a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12use crate::context::ContextProvider;
13use crate::hitl::ConfirmationProvider;
14use crate::hooks::HookExecutor;
15#[cfg(test)]
16use crate::llm::LlmResponse;
17use crate::llm::{LlmClient, Message, TokenUsage, ToolDefinition};
18use crate::permissions::{PermissionChecker, PermissionPolicy};
19use crate::planning::{AgentGoal, ExecutionPlan, TaskStatus};
20use crate::prompts::{PlanningMode, SystemPromptSlots};
21use crate::queue::{SessionCommand, SessionQueueConfig};
22use crate::session_lane_queue::SessionLaneQueue;
23use crate::tools::{ToolContext, ToolExecutor};
24use anyhow::Result;
25use async_trait::async_trait;
26use serde::{Deserialize, Serialize};
27use serde_json::Value;
28use std::sync::Arc;
29
30mod completion_runtime;
31mod context_perception;
32mod execution_entry;
33mod execution_mode;
34mod execution_state;
35mod hook_runtime;
36mod llm_turn;
37mod loop_builder;
38mod loop_runtime;
39mod parallel_tool_runtime;
40mod plan_execution;
41mod planning_runtime;
42mod project_context;
43mod prompt_runtime;
44mod queue_forwarder;
45mod telemetry_runtime;
46mod tool_completion_runtime;
47mod tool_execution_runtime;
48mod tool_gate_runtime;
49mod tool_guard_runtime;
50mod tool_memory_runtime;
51mod tool_result_runtime;
52mod tool_turn;
53mod turn_context;
54
55/// Maximum number of tool execution rounds before stopping
56const MAX_TOOL_ROUNDS: usize = 50;
57
58/// Internal agent loop configuration.
59#[derive(Clone)]
60pub(crate) struct AgentConfig {
61    /// Slot-based system prompt customization.
62    ///
63    /// Users can customize specific parts (role, guidelines, response style, extra)
64    /// without overriding the core agentic capabilities. The default agentic core
65    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
66    pub prompt_slots: SystemPromptSlots,
67    pub tools: Vec<ToolDefinition>,
68    pub max_tool_rounds: usize,
69    /// Optional security provider for input taint tracking and output sanitization
70    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
71    /// Optional permission checker for tool execution control
72    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
73    /// Serializable permission policy used to build the checker, when available.
74    pub permission_policy: Option<PermissionPolicy>,
75    /// Optional confirmation manager for HITL (Human-in-the-Loop)
76    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
77    /// Serializable confirmation policy used to build the manager, when available.
78    pub confirmation_policy: Option<crate::hitl::ConfirmationPolicy>,
79    /// Serializable queue configuration used to build the optional command queue.
80    pub queue_config: Option<SessionQueueConfig>,
81    /// Context providers for augmenting prompts with external context
82    pub context_providers: Vec<Arc<dyn ContextProvider>>,
83    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
84    pub planning_mode: PlanningMode,
85    /// Enable goal tracking
86    pub goal_tracking: bool,
87    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
88    pub hook_engine: Option<Arc<dyn HookExecutor>>,
89    /// Optional skill registry for tool permission enforcement
90    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
91    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
92    ///
93    /// When the LLM returns tool arguments with `__parse_error`, the error is
94    /// fed back as a tool result. After this many consecutive parse errors the
95    /// loop bails instead of retrying indefinitely.
96    pub max_parse_retries: u32,
97    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
98    ///
99    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
100    /// A timeout produces an error result sent back to the LLM rather than
101    /// crashing the session.
102    pub tool_timeout_ms: Option<u64>,
103    /// Circuit-breaker threshold: max consecutive LLM API failures before
104    /// aborting (default: 3).
105    ///
106    /// In non-streaming mode, transient LLM failures are retried up to this
107    /// many times (with short exponential backoff) before the loop bails.
108    /// In streaming mode, any failure is fatal (events cannot be replayed).
109    pub circuit_breaker_threshold: u32,
110    /// Max consecutive identical tool signatures before aborting (default: 3).
111    ///
112    /// A tool signature is the exact combination of tool name + compact JSON
113    /// arguments. This prevents the agent from getting stuck repeating the same
114    /// tool call in a loop, for example repeatedly fetching the same URL.
115    pub duplicate_tool_call_threshold: u32,
116    /// Enable auto-compaction when context usage exceeds threshold.
117    pub auto_compact: bool,
118    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
119    /// Default: 0.80 (80%).
120    pub auto_compact_threshold: f32,
121    /// Maximum context window size in tokens (used for auto-compact calculation).
122    /// Default: 200_000.
123    pub max_context_tokens: usize,
124    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
125    pub memory: Option<Arc<crate::memory::AgentMemory>>,
126    /// Inject a continuation message when the LLM stops calling tools before the
127    /// task is complete. Enabled by default. Set to `false` to disable.
128    ///
129    /// When enabled, if the LLM produces a response with no tool calls but the
130    /// response text looks like an intermediate step (not a final answer), the
131    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
132    /// continues for up to `max_continuation_turns` additional turns.
133    pub continuation_enabled: bool,
134    /// Maximum number of continuation injections per execution (default: 3).
135    ///
136    /// Prevents infinite loops when the LLM repeatedly stops without completing.
137    pub max_continuation_turns: u32,
138    /// Maximum execution time in milliseconds (`None` = no timeout).
139    ///
140    /// When set, the entire execution loop is wrapped in a timeout check.
141    /// If execution exceeds this duration, the loop bails with an error.
142    /// This prevents runaway executions that consume excessive API quota.
143    pub max_execution_time_ms: Option<u64>,
144}
145
146impl std::fmt::Debug for AgentConfig {
147    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148        f.debug_struct("AgentConfig")
149            .field("prompt_slots", &self.prompt_slots)
150            .field("tools", &self.tools)
151            .field("max_tool_rounds", &self.max_tool_rounds)
152            .field("security_provider", &self.security_provider.is_some())
153            .field("permission_checker", &self.permission_checker.is_some())
154            .field("permission_policy", &self.permission_policy.is_some())
155            .field("confirmation_manager", &self.confirmation_manager.is_some())
156            .field("confirmation_policy", &self.confirmation_policy.is_some())
157            .field("queue_config", &self.queue_config.is_some())
158            .field("context_providers", &self.context_providers.len())
159            .field("planning_mode", &self.planning_mode)
160            .field("goal_tracking", &self.goal_tracking)
161            .field("hook_engine", &self.hook_engine.is_some())
162            .field(
163                "skill_registry",
164                &self.skill_registry.as_ref().map(|r| r.len()),
165            )
166            .field("max_parse_retries", &self.max_parse_retries)
167            .field("tool_timeout_ms", &self.tool_timeout_ms)
168            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
169            .field(
170                "duplicate_tool_call_threshold",
171                &self.duplicate_tool_call_threshold,
172            )
173            .field("auto_compact", &self.auto_compact)
174            .field("auto_compact_threshold", &self.auto_compact_threshold)
175            .field("max_context_tokens", &self.max_context_tokens)
176            .field("continuation_enabled", &self.continuation_enabled)
177            .field("max_continuation_turns", &self.max_continuation_turns)
178            .field("memory", &self.memory.is_some())
179            .finish()
180    }
181}
182
183impl Default for AgentConfig {
184    fn default() -> Self {
185        Self {
186            prompt_slots: SystemPromptSlots::default(),
187            tools: Vec::new(), // Tools are provided by ToolExecutor
188            max_tool_rounds: MAX_TOOL_ROUNDS,
189            security_provider: None,
190            permission_checker: None,
191            permission_policy: None,
192            confirmation_manager: None,
193            confirmation_policy: None,
194            queue_config: None,
195            context_providers: Vec::new(),
196            planning_mode: PlanningMode::default(),
197            goal_tracking: false,
198            hook_engine: None,
199            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
200            max_parse_retries: 2,
201            tool_timeout_ms: None,
202            circuit_breaker_threshold: 3,
203            duplicate_tool_call_threshold: 3,
204            auto_compact: false,
205            auto_compact_threshold: 0.80,
206            max_context_tokens: 200_000,
207            memory: None,
208            continuation_enabled: true,
209            max_continuation_turns: 3,
210            max_execution_time_ms: None,
211        }
212    }
213}
214
215/// Events emitted during agent execution
216///
217/// Subscribe via [`crate::AgentSession::stream`].
218/// New variants may be added in minor releases — always include a wildcard arm
219/// (`_ => {}`) when matching.
220#[derive(Debug, Clone, Serialize, Deserialize)]
221#[serde(tag = "type")]
222#[non_exhaustive]
223pub enum AgentEvent {
224    /// Agent started processing
225    #[serde(rename = "agent_start")]
226    Start { prompt: String },
227
228    /// Runtime agent style/mode selected for the current execution.
229    #[serde(rename = "agent_mode_changed")]
230    AgentModeChanged {
231        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
232        mode: String,
233        /// Canonical built-in agent name associated with this mode.
234        agent: String,
235        /// Human-readable explanation of the selected style.
236        description: String,
237    },
238
239    /// LLM turn started
240    #[serde(rename = "turn_start")]
241    TurnStart { turn: usize },
242
243    /// Text delta from streaming
244    #[serde(rename = "text_delta")]
245    TextDelta { text: String },
246
247    /// Reasoning/thinking delta from streaming (for models like kimi, deepseek)
248    #[serde(rename = "reasoning_delta")]
249    ReasoningDelta { text: String },
250
251    /// Tool execution started
252    #[serde(rename = "tool_start")]
253    ToolStart { id: String, name: String },
254
255    /// Tool input delta from streaming (partial JSON arguments)
256    #[serde(rename = "tool_input_delta")]
257    ToolInputDelta { delta: String },
258
259    /// Tool execution completed
260    #[serde(rename = "tool_end")]
261    ToolEnd {
262        id: String,
263        name: String,
264        output: String,
265        exit_code: i32,
266        #[serde(skip_serializing_if = "Option::is_none")]
267        metadata: Option<serde_json::Value>,
268    },
269
270    /// Intermediate tool output (streaming delta)
271    #[serde(rename = "tool_output_delta")]
272    ToolOutputDelta {
273        id: String,
274        name: String,
275        delta: String,
276    },
277
278    /// LLM turn completed
279    #[serde(rename = "turn_end")]
280    TurnEnd { turn: usize, usage: TokenUsage },
281
282    /// Agent completed
283    #[serde(rename = "agent_end")]
284    End {
285        text: String,
286        usage: TokenUsage,
287        verification_summary: Box<crate::verification::VerificationSummary>,
288        #[serde(skip_serializing_if = "Option::is_none")]
289        meta: Option<crate::llm::LlmResponseMeta>,
290    },
291
292    /// Error occurred
293    #[serde(rename = "error")]
294    Error { message: String },
295
296    /// Tool execution requires confirmation (HITL)
297    #[serde(rename = "confirmation_required")]
298    ConfirmationRequired {
299        tool_id: String,
300        tool_name: String,
301        args: serde_json::Value,
302        timeout_ms: u64,
303    },
304
305    /// Confirmation received from user (HITL)
306    #[serde(rename = "confirmation_received")]
307    ConfirmationReceived {
308        tool_id: String,
309        approved: bool,
310        reason: Option<String>,
311    },
312
313    /// Confirmation timed out (HITL)
314    #[serde(rename = "confirmation_timeout")]
315    ConfirmationTimeout {
316        tool_id: String,
317        action_taken: String, // "rejected" or "auto_approved"
318    },
319
320    /// External task pending (needs SDK processing)
321    #[serde(rename = "external_task_pending")]
322    ExternalTaskPending {
323        task_id: String,
324        session_id: String,
325        lane: crate::queue::SessionLane,
326        command_type: String,
327        payload: serde_json::Value,
328        timeout_ms: u64,
329    },
330
331    /// External task completed
332    #[serde(rename = "external_task_completed")]
333    ExternalTaskCompleted {
334        task_id: String,
335        session_id: String,
336        success: bool,
337    },
338
339    /// Tool execution denied by permission policy
340    #[serde(rename = "permission_denied")]
341    PermissionDenied {
342        tool_id: String,
343        tool_name: String,
344        args: serde_json::Value,
345        reason: String,
346    },
347
348    /// Context resolution started
349    #[serde(rename = "context_resolving")]
350    ContextResolving { providers: Vec<String> },
351
352    /// Context resolution completed
353    #[serde(rename = "context_resolved")]
354    ContextResolved {
355        total_items: usize,
356        total_tokens: usize,
357    },
358
359    // ========================================================================
360    // a3s-lane integration events
361    // ========================================================================
362    /// Command moved to dead letter queue after exhausting retries
363    #[serde(rename = "command_dead_lettered")]
364    CommandDeadLettered {
365        command_id: String,
366        command_type: String,
367        lane: String,
368        error: String,
369        attempts: u32,
370    },
371
372    /// Command retry attempt
373    #[serde(rename = "command_retry")]
374    CommandRetry {
375        command_id: String,
376        command_type: String,
377        lane: String,
378        attempt: u32,
379        delay_ms: u64,
380    },
381
382    /// Queue alert (depth warning, latency alert, etc.)
383    #[serde(rename = "queue_alert")]
384    QueueAlert {
385        level: String,
386        alert_type: String,
387        message: String,
388    },
389
390    // ========================================================================
391    // Task tracking events
392    // ========================================================================
393    /// Task list updated
394    #[serde(rename = "task_updated")]
395    TaskUpdated {
396        session_id: String,
397        tasks: Vec<crate::planning::Task>,
398    },
399
400    // ========================================================================
401    // Memory System events (Phase 3)
402    // ========================================================================
403    /// Memory stored
404    #[serde(rename = "memory_stored")]
405    MemoryStored {
406        memory_id: String,
407        memory_type: String,
408        importance: f32,
409        tags: Vec<String>,
410    },
411
412    /// Memory recalled
413    #[serde(rename = "memory_recalled")]
414    MemoryRecalled {
415        memory_id: String,
416        content: String,
417        relevance: f32,
418    },
419
420    /// Memories searched
421    #[serde(rename = "memories_searched")]
422    MemoriesSearched {
423        query: Option<String>,
424        tags: Vec<String>,
425        result_count: usize,
426    },
427
428    /// Memory cleared
429    #[serde(rename = "memory_cleared")]
430    MemoryCleared {
431        tier: String, // "long_term", "short_term", "working"
432        count: u64,
433    },
434
435    // ========================================================================
436    // Subagent events
437    // ========================================================================
438    /// Subagent task started
439    #[serde(rename = "subagent_start")]
440    SubagentStart {
441        /// Unique task identifier
442        task_id: String,
443        /// Child session ID
444        session_id: String,
445        /// Parent session ID
446        parent_session_id: String,
447        /// Agent type (e.g., "explore", "general")
448        agent: String,
449        /// Short description of the task
450        description: String,
451    },
452
453    /// Subagent task progress update
454    #[serde(rename = "subagent_progress")]
455    SubagentProgress {
456        /// Task identifier
457        task_id: String,
458        /// Child session ID
459        session_id: String,
460        /// Progress status message
461        status: String,
462        /// Additional metadata
463        metadata: serde_json::Value,
464    },
465
466    /// Subagent task completed
467    #[serde(rename = "subagent_end")]
468    SubagentEnd {
469        /// Task identifier
470        task_id: String,
471        /// Child session ID
472        session_id: String,
473        /// Agent type
474        agent: String,
475        /// Task output/result
476        output: String,
477        /// Whether the task succeeded
478        success: bool,
479    },
480
481    // ========================================================================
482    // Planning and Goal Tracking Events (Phase 1)
483    // ========================================================================
484    /// Planning phase started
485    #[serde(rename = "planning_start")]
486    PlanningStart { prompt: String },
487
488    /// Planning phase completed
489    #[serde(rename = "planning_end")]
490    PlanningEnd {
491        plan: ExecutionPlan,
492        estimated_steps: usize,
493    },
494
495    /// Step execution started
496    #[serde(rename = "step_start")]
497    StepStart {
498        step_id: String,
499        description: String,
500        step_number: usize,
501        total_steps: usize,
502    },
503
504    /// Step execution completed
505    #[serde(rename = "step_end")]
506    StepEnd {
507        step_id: String,
508        status: TaskStatus,
509        step_number: usize,
510        total_steps: usize,
511    },
512
513    /// Goal extracted from prompt
514    #[serde(rename = "goal_extracted")]
515    GoalExtracted { goal: AgentGoal },
516
517    /// Goal progress update
518    #[serde(rename = "goal_progress")]
519    GoalProgress {
520        goal: String,
521        progress: f32,
522        completed_steps: usize,
523        total_steps: usize,
524    },
525
526    /// Goal achieved
527    #[serde(rename = "goal_achieved")]
528    GoalAchieved {
529        goal: String,
530        total_steps: usize,
531        duration_ms: i64,
532    },
533
534    // ========================================================================
535    // Context Compaction events
536    // ========================================================================
537    /// Context automatically compacted due to high usage
538    #[serde(rename = "context_compacted")]
539    ContextCompacted {
540        session_id: String,
541        before_messages: usize,
542        after_messages: usize,
543        percent_before: f32,
544    },
545
546    // ========================================================================
547    // Persistence events
548    // ========================================================================
549    /// Session persistence failed — SDK clients should handle this
550    #[serde(rename = "persistence_failed")]
551    PersistenceFailed {
552        session_id: String,
553        operation: String,
554        error: String,
555    },
556}
557
558/// Result of agent execution
559#[derive(Debug, Clone)]
560pub struct AgentResult {
561    pub text: String,
562    pub messages: Vec<Message>,
563    pub usage: TokenUsage,
564    pub tool_calls_count: usize,
565    pub verification_reports: Vec<crate::verification::VerificationReport>,
566}
567
568impl AgentResult {
569    pub fn verification_summary(&self) -> crate::verification::VerificationSummary {
570        crate::verification::VerificationSummary::from_reports(&self.verification_reports)
571    }
572
573    pub fn verification_summary_text(&self) -> String {
574        crate::verification::format_verification_summary(&self.verification_summary())
575    }
576
577    pub fn has_pending_verification(&self) -> bool {
578        matches!(
579            self.verification_summary().status,
580            crate::verification::VerificationStatus::NeedsReview
581        )
582    }
583}
584
585// ============================================================================
586// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
587// ============================================================================
588
589/// Adapter that implements `SessionCommand` for tool execution via the queue.
590///
591/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
592pub struct ToolCommand {
593    tool_executor: Arc<ToolExecutor>,
594    tool_name: String,
595    tool_args: Value,
596    tool_context: ToolContext,
597    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
598}
599
600impl ToolCommand {
601    /// Create a new ToolCommand
602    pub fn new(
603        tool_executor: Arc<ToolExecutor>,
604        tool_name: String,
605        tool_args: Value,
606        tool_context: ToolContext,
607        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
608    ) -> Self {
609        Self {
610            tool_executor,
611            tool_name,
612            tool_args,
613            tool_context,
614            skill_registry,
615        }
616    }
617}
618
619#[async_trait]
620impl SessionCommand for ToolCommand {
621    async fn execute(&self) -> Result<Value> {
622        // Check skill-based tool permissions
623        if let Some(registry) = &self.skill_registry {
624            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
625
626            // If there are instruction skills with tool restrictions, check permissions
627            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
628
629            if has_restrictions {
630                let mut allowed = false;
631
632                for skill in &instruction_skills {
633                    if skill.is_tool_allowed(&self.tool_name) {
634                        allowed = true;
635                        break;
636                    }
637                }
638
639                if !allowed {
640                    return Err(anyhow::anyhow!(
641                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
642                        self.tool_name
643                    ));
644                }
645            }
646        }
647
648        // Execute the tool
649        let result = self
650            .tool_executor
651            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
652            .await?;
653        Ok(serde_json::json!({
654            "output": result.output,
655            "exit_code": result.exit_code,
656            "metadata": result.metadata,
657        }))
658    }
659
660    fn command_type(&self) -> &str {
661        &self.tool_name
662    }
663
664    fn payload(&self) -> Value {
665        self.tool_args.clone()
666    }
667}
668
669// ============================================================================
670// AgentLoop
671// ============================================================================
672
673/// Internal agent loop executor.
674#[derive(Clone)]
675pub(crate) struct AgentLoop {
676    llm_client: Arc<dyn LlmClient>,
677    tool_executor: Arc<ToolExecutor>,
678    tool_context: ToolContext,
679    config: AgentConfig,
680    /// Optional lane queue for priority-based tool execution
681    command_queue: Option<Arc<SessionLaneQueue>>,
682}
683
684#[cfg(test)]
685mod tests;
686
687#[cfg(test)]
688mod extra_agent_tests;
a3s_code_core/agent.rs

a3s_code_core/
agent.rs