Skip to main content

tirea_agent_loop/runtime/loop_runner/
config.rs

1use super::tool_exec::ParallelToolExecutor;
2use super::AgentLoopError;
3use crate::contracts::runtime::behavior::NoOpBehavior;
4use crate::contracts::runtime::tool_call::{Tool, ToolDescriptor};
5use crate::contracts::runtime::AgentBehavior;
6use crate::contracts::runtime::ToolExecutor;
7use crate::contracts::RunContext;
8use async_trait::async_trait;
9use genai::chat::ChatOptions;
10use genai::Client;
11use std::collections::HashMap;
12use std::sync::Arc;
13
14/// Retry strategy for LLM inference calls.
15#[derive(Debug, Clone)]
16pub struct LlmRetryPolicy {
17    /// Max attempts per model candidate (must be >= 1).
18    pub max_attempts_per_model: usize,
19    /// Initial backoff for retries in milliseconds.
20    pub initial_backoff_ms: u64,
21    /// Max backoff cap in milliseconds.
22    pub max_backoff_ms: u64,
23    /// Random jitter applied around the exponential backoff base, expressed as
24    /// a percentage of the computed delay.
25    pub backoff_jitter_percent: u8,
26    /// Maximum wall-clock retry window in milliseconds for one retry sequence.
27    /// When exceeded, no further retry backoff is scheduled.
28    pub max_retry_window_ms: Option<u64>,
29    /// Retry stream startup failures before any output is emitted.
30    pub retry_stream_start: bool,
31    /// Max retryable mid-stream recovery attempts across a single run.
32    pub max_stream_event_retries: usize,
33    /// Consecutive mid-stream failures on one model before escalating to the
34    /// next fallback model.
35    pub stream_error_fallback_threshold: usize,
36}
37
38impl Default for LlmRetryPolicy {
39    fn default() -> Self {
40        Self {
41            max_attempts_per_model: 2,
42            initial_backoff_ms: 250,
43            max_backoff_ms: 2_000,
44            backoff_jitter_percent: 20,
45            max_retry_window_ms: Some(10_000),
46            retry_stream_start: true,
47            max_stream_event_retries: 2,
48            stream_error_fallback_threshold: 2,
49        }
50    }
51}
52
53/// Input context passed to per-step tool providers.
54pub struct StepToolInput<'a> {
55    /// Current run context at step boundary.
56    pub state: &'a RunContext,
57}
58
59/// Tool snapshot resolved for one step.
60#[derive(Clone, Default)]
61pub struct StepToolSnapshot {
62    /// Concrete tool map used for this step.
63    pub tools: HashMap<String, Arc<dyn Tool>>,
64    /// Tool descriptors exposed to plugins/LLM for this step.
65    pub descriptors: Vec<ToolDescriptor>,
66}
67
68impl StepToolSnapshot {
69    /// Build a step snapshot from a concrete tool map.
70    pub fn from_tools(tools: HashMap<String, Arc<dyn Tool>>) -> Self {
71        let descriptors = tools
72            .values()
73            .map(|tool| tool.descriptor().clone())
74            .collect();
75        Self { tools, descriptors }
76    }
77}
78
79/// Provider that resolves the tool snapshot for each step.
80#[async_trait]
81pub trait StepToolProvider: Send + Sync {
82    /// Resolve tool map + descriptors for the current step.
83    async fn provide(&self, input: StepToolInput<'_>) -> Result<StepToolSnapshot, AgentLoopError>;
84}
85
86/// Boxed stream of LLM chat events.
87pub type LlmEventStream = std::pin::Pin<
88    Box<dyn futures::Stream<Item = Result<genai::chat::ChatStreamEvent, genai::Error>> + Send>,
89>;
90
91/// Abstraction over LLM inference backends.
92///
93/// The agent loop calls this trait for both non-streaming (`exec_chat_response`)
94/// and streaming (`exec_chat_stream_events`) inference.  The default
95/// implementation ([`GenaiLlmExecutor`]) delegates to `genai::Client`.
96#[async_trait]
97pub trait LlmExecutor: Send + Sync {
98    /// Run a non-streaming chat completion.
99    async fn exec_chat_response(
100        &self,
101        model: &str,
102        chat_req: genai::chat::ChatRequest,
103        options: Option<&genai::chat::ChatOptions>,
104    ) -> genai::Result<genai::chat::ChatResponse>;
105
106    /// Run a streaming chat completion, returning a boxed event stream.
107    async fn exec_chat_stream_events(
108        &self,
109        model: &str,
110        chat_req: genai::chat::ChatRequest,
111        options: Option<&genai::chat::ChatOptions>,
112    ) -> genai::Result<LlmEventStream>;
113
114    /// Stable label for logging / debug output.
115    fn name(&self) -> &'static str;
116}
117
118/// Default LLM executor backed by `genai::Client`.
119#[derive(Clone)]
120pub struct GenaiLlmExecutor {
121    client: Client,
122}
123
124impl GenaiLlmExecutor {
125    pub fn new(client: Client) -> Self {
126        Self { client }
127    }
128}
129
130impl std::fmt::Debug for GenaiLlmExecutor {
131    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
132        f.debug_struct("GenaiLlmExecutor").finish()
133    }
134}
135
136#[async_trait]
137impl LlmExecutor for GenaiLlmExecutor {
138    async fn exec_chat_response(
139        &self,
140        model: &str,
141        chat_req: genai::chat::ChatRequest,
142        options: Option<&ChatOptions>,
143    ) -> genai::Result<genai::chat::ChatResponse> {
144        self.client.exec_chat(model, chat_req, options).await
145    }
146
147    async fn exec_chat_stream_events(
148        &self,
149        model: &str,
150        chat_req: genai::chat::ChatRequest,
151        options: Option<&ChatOptions>,
152    ) -> genai::Result<LlmEventStream> {
153        let resp = self
154            .client
155            .exec_chat_stream(model, chat_req, options)
156            .await?;
157        Ok(Box::pin(resp.stream))
158    }
159
160    fn name(&self) -> &'static str {
161        "genai_client"
162    }
163}
164
165/// Static provider that always returns the same tool map.
166#[derive(Clone, Default)]
167pub struct StaticStepToolProvider {
168    tools: HashMap<String, Arc<dyn Tool>>,
169}
170
171impl StaticStepToolProvider {
172    pub fn new(tools: HashMap<String, Arc<dyn Tool>>) -> Self {
173        Self { tools }
174    }
175}
176
177#[async_trait]
178impl StepToolProvider for StaticStepToolProvider {
179    async fn provide(&self, _input: StepToolInput<'_>) -> Result<StepToolSnapshot, AgentLoopError> {
180        Ok(StepToolSnapshot::from_tools(self.tools.clone()))
181    }
182}
183
184// =========================================================================
185// Agent — the sole interface the loop sees
186// =========================================================================
187
188/// The sole interface the agent loop sees.
189///
190/// `Agent` encapsulates all runtime configuration, execution strategies,
191/// and agent behavior into a single trait. The loop calls methods on this trait
192/// to obtain LLM settings, tool execution strategies, and phase-hook behavior.
193///
194/// # Three-Layer Architecture
195///
196/// - **Loop**: pure engine — calls `Agent` methods, manages lifecycle
197/// - **Agent**: complete agent unit — provides config + behavior
198/// - **AgentOS**: assembly — resolves definitions into `Agent` instances
199///
200/// The default implementation is [`BaseAgent`].
201pub trait Agent: Send + Sync {
202    // --- Identity ---
203
204    /// Unique identifier for this agent.
205    fn id(&self) -> &str;
206
207    // --- LLM Configuration ---
208
209    /// Model identifier (e.g., "gpt-4", "claude-3-opus").
210    fn model(&self) -> &str;
211
212    /// System prompt for the LLM.
213    fn system_prompt(&self) -> &str;
214
215    /// Loop-budget hint (core loop does not enforce this directly).
216    fn max_rounds(&self) -> usize;
217
218    /// Chat options for the LLM.
219    fn chat_options(&self) -> Option<&ChatOptions>;
220
221    /// Fallback model ids used when the primary model fails.
222    fn fallback_models(&self) -> &[String];
223
224    /// Retry policy for LLM inference failures.
225    fn llm_retry_policy(&self) -> &LlmRetryPolicy;
226
227    // --- Execution Strategies ---
228
229    /// Tool execution strategy (parallel, sequential, or custom).
230    fn tool_executor(&self) -> Arc<dyn ToolExecutor>;
231
232    /// Optional per-step tool provider.
233    ///
234    /// When `None`, the loop uses a static provider derived from the tool map.
235    fn step_tool_provider(&self) -> Option<Arc<dyn StepToolProvider>> {
236        None
237    }
238
239    /// Optional LLM executor override.
240    ///
241    /// When `None`, the loop uses [`GenaiLlmExecutor`] with `Client::default()`.
242    fn llm_executor(&self) -> Option<Arc<dyn LlmExecutor>> {
243        None
244    }
245
246    // --- Behavior ---
247
248    /// The agent behavior (phase hooks) dispatched by the loop.
249    fn behavior(&self) -> &dyn AgentBehavior;
250
251    // --- Recovery ---
252
253    /// Registry for deserializing persisted state-action intents during crash recovery.
254    fn state_action_deserializer_registry(
255        &self,
256    ) -> Arc<tirea_contract::runtime::state::StateActionDeserializerRegistry> {
257        Arc::new(tirea_contract::runtime::state::StateActionDeserializerRegistry::new())
258    }
259}
260
261impl std::fmt::Debug for dyn Agent {
262    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
263        f.debug_struct("Agent")
264            .field("id", &self.id())
265            .field("model", &self.model())
266            .field("max_rounds", &self.max_rounds())
267            .field("behavior", &self.behavior().id())
268            .finish()
269    }
270}
271
272// =========================================================================
273// BaseAgent — the standard Agent implementation
274// =========================================================================
275
276/// Standard [`Agent`] implementation.
277///
278/// Bundles all configuration and behavior for running an agent loop.
279/// Constructed by `AgentOS` from an `AgentDefinition`, or directly for tests.
280#[derive(Clone)]
281pub struct BaseAgent {
282    /// Unique identifier for this agent.
283    pub id: String,
284    /// Model identifier (e.g., "gpt-4", "claude-3-opus").
285    pub model: String,
286    /// System prompt for the LLM.
287    pub system_prompt: String,
288    /// Optional loop-budget hint (core loop does not enforce this directly).
289    pub max_rounds: usize,
290    /// Tool execution strategy (parallel, sequential, or custom).
291    pub tool_executor: Arc<dyn ToolExecutor>,
292    /// Chat options for the LLM.
293    pub chat_options: Option<ChatOptions>,
294    /// Fallback model ids used when the primary model fails.
295    pub fallback_models: Vec<String>,
296    /// Retry policy for LLM inference failures.
297    pub llm_retry_policy: LlmRetryPolicy,
298    /// Agent behavior (declarative model).
299    pub behavior: Arc<dyn AgentBehavior>,
300    /// Lattice registry for CRDT-aware conflict resolution.
301    pub lattice_registry: Arc<tirea_state::LatticeRegistry>,
302    /// State scope registry mapping `StateSpec` types to their declared scopes.
303    pub state_scope_registry: Arc<tirea_contract::runtime::state::StateScopeRegistry>,
304    /// Optional per-step tool provider.
305    pub step_tool_provider: Option<Arc<dyn StepToolProvider>>,
306    /// Optional LLM executor override.
307    pub llm_executor: Option<Arc<dyn LlmExecutor>>,
308    /// Registry for deserializing persisted state-action intents during crash recovery.
309    pub state_action_deserializer_registry:
310        Arc<tirea_contract::runtime::state::StateActionDeserializerRegistry>,
311}
312
313impl Default for BaseAgent {
314    fn default() -> Self {
315        Self {
316            id: "default".to_string(),
317            model: "gpt-4o-mini".to_string(),
318            system_prompt: String::new(),
319            max_rounds: 10,
320            tool_executor: Arc::new(ParallelToolExecutor::streaming()),
321            chat_options: Some(
322                ChatOptions::default()
323                    .with_capture_usage(true)
324                    .with_capture_reasoning_content(true)
325                    .with_capture_tool_calls(true),
326            ),
327            fallback_models: Vec::new(),
328            llm_retry_policy: LlmRetryPolicy::default(),
329            behavior: Arc::new(NoOpBehavior),
330            lattice_registry: Arc::new(tirea_state::LatticeRegistry::new()),
331            state_scope_registry: Arc::new(
332                tirea_contract::runtime::state::StateScopeRegistry::new(),
333            ),
334            step_tool_provider: None,
335            llm_executor: None,
336            state_action_deserializer_registry: Arc::new(
337                tirea_contract::runtime::state::StateActionDeserializerRegistry::new(),
338            ),
339        }
340    }
341}
342
343impl std::fmt::Debug for BaseAgent {
344    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
345        f.debug_struct("BaseAgent")
346            .field("id", &self.id)
347            .field("model", &self.model)
348            .field(
349                "system_prompt",
350                &format!("[{} chars]", self.system_prompt.len()),
351            )
352            .field("max_rounds", &self.max_rounds)
353            .field("tool_executor", &self.tool_executor.name())
354            .field("chat_options", &self.chat_options)
355            .field("fallback_models", &self.fallback_models)
356            .field("llm_retry_policy", &self.llm_retry_policy)
357            .field("behavior", &self.behavior.id())
358            .field(
359                "step_tool_provider",
360                &self.step_tool_provider.as_ref().map(|_| "<set>"),
361            )
362            .field(
363                "llm_executor",
364                &self
365                    .llm_executor
366                    .as_ref()
367                    .map(|executor| executor.name())
368                    .unwrap_or("genai_client(default)"),
369            )
370            .finish()
371    }
372}
373
374impl Agent for BaseAgent {
375    fn id(&self) -> &str {
376        &self.id
377    }
378
379    fn model(&self) -> &str {
380        &self.model
381    }
382
383    fn system_prompt(&self) -> &str {
384        &self.system_prompt
385    }
386
387    fn max_rounds(&self) -> usize {
388        self.max_rounds
389    }
390
391    fn chat_options(&self) -> Option<&ChatOptions> {
392        self.chat_options.as_ref()
393    }
394
395    fn fallback_models(&self) -> &[String] {
396        &self.fallback_models
397    }
398
399    fn llm_retry_policy(&self) -> &LlmRetryPolicy {
400        &self.llm_retry_policy
401    }
402
403    fn tool_executor(&self) -> Arc<dyn ToolExecutor> {
404        self.tool_executor.clone()
405    }
406
407    fn step_tool_provider(&self) -> Option<Arc<dyn StepToolProvider>> {
408        self.step_tool_provider.clone()
409    }
410
411    fn llm_executor(&self) -> Option<Arc<dyn LlmExecutor>> {
412        self.llm_executor.clone()
413    }
414
415    fn behavior(&self) -> &dyn AgentBehavior {
416        self.behavior.as_ref()
417    }
418
419    fn state_action_deserializer_registry(
420        &self,
421    ) -> Arc<tirea_contract::runtime::state::StateActionDeserializerRegistry> {
422        self.state_action_deserializer_registry.clone()
423    }
424}
425
426impl BaseAgent {
427    tirea_contract::impl_shared_agent_builder_methods!();
428
429    /// Set tool executor strategy.
430    #[must_use]
431    pub fn with_tool_executor(mut self, executor: Arc<dyn ToolExecutor>) -> Self {
432        self.tool_executor = executor;
433        self
434    }
435
436    /// Set static tool map (wraps in [`StaticStepToolProvider`]).
437    ///
438    /// Prefer passing tools directly to [`run_loop`] / [`run_loop_stream`];
439    /// use this only when you need to set tools via `step_tool_provider`.
440    #[must_use]
441    pub fn with_tools(self, tools: HashMap<String, Arc<dyn Tool>>) -> Self {
442        self.with_step_tool_provider(Arc::new(StaticStepToolProvider::new(tools)))
443    }
444
445    /// Set per-step tool provider.
446    #[must_use]
447    pub fn with_step_tool_provider(mut self, provider: Arc<dyn StepToolProvider>) -> Self {
448        self.step_tool_provider = Some(provider);
449        self
450    }
451
452    /// Set LLM executor.
453    #[must_use]
454    pub fn with_llm_executor(mut self, executor: Arc<dyn LlmExecutor>) -> Self {
455        self.llm_executor = Some(executor);
456        self
457    }
458
459    /// Set the agent behavior (declarative model), replacing any existing behavior.
460    ///
461    /// The loop dispatches all phase hooks exclusively through this behavior.
462    #[must_use]
463    pub fn with_behavior(mut self, behavior: Arc<dyn AgentBehavior>) -> Self {
464        self.behavior = behavior;
465        self
466    }
467
468    /// Check if any behavior is configured.
469    pub fn has_behavior(&self) -> bool {
470        self.behavior.id() != "noop"
471    }
472}