Skip to main content

ai_agents_runtime/optimization/
config.rs

1use serde::{Deserialize, Serialize};
2
3use ai_agents_core::{AgentError, Result};
4
5/// Runtime-level configuration for latency optimization.
6#[derive(Debug, Clone, Serialize, Deserialize, Default)]
7#[serde(default)]
8pub struct RuntimeConfig {
9    /// Policies that reduce latency without changing behavior by default.
10    pub optimization: RuntimeOptimizationConfig,
11}
12
13/// Controls safe pre-response routing, maintenance concurrency, and runtime task limits.
14#[derive(Debug, Clone, Serialize, Deserialize)]
15#[serde(default)]
16pub struct RuntimeOptimizationConfig {
17    /// Enables runtime optimization behavior. Disabled agents keep serial behavior.
18    pub enabled: bool,
19    /// Hard cap for additional speculative LLM calls in one turn.
20    pub max_speculative_llm_calls_per_turn: u32,
21    /// Runs explicitly marked guard and resolved-intent transitions before old-state response generation.
22    pub pre_response_deterministic_transitions: bool,
23    /// Runs current-state extractors before pre-response transition selection when requested.
24    pub pre_response_extractors: bool,
25    /// Enables response-independent transition branches beside a draft response.
26    pub speculative_state_transitions: bool,
27    /// Enables pure skill routing beside a draft response.
28    pub speculative_skill_routing: bool,
29    /// Enables auto reasoning decisions beside a plain draft response.
30    pub speculative_reasoning_auto: bool,
31    /// Allows facts and relationship maintenance to run concurrently when configured.
32    pub parallel_post_turn_memory: bool,
33    /// Allows orchestration vote extraction to run concurrently while preserving order.
34    pub parallel_orchestration_vote_extraction: bool,
35    /// Reserved for snapshot-based observability export outside the response path.
36    pub background_observability_export: bool,
37    /// Streaming safety policy used when optimization is enabled.
38    pub streaming_policy: StreamingOptimizationPolicy,
39    /// Maximum internal runtime tasks scheduled at once.
40    pub max_parallel_runtime_tasks: usize,
41    /// Post-turn maintenance policy for future-turn work.
42    pub post_turn: PostTurnOptimizationConfig,
43}
44
45impl Default for RuntimeOptimizationConfig {
46    fn default() -> Self {
47        Self {
48            enabled: false,
49            max_speculative_llm_calls_per_turn: 0,
50            pre_response_deterministic_transitions: false,
51            pre_response_extractors: false,
52            speculative_state_transitions: false,
53            speculative_skill_routing: false,
54            speculative_reasoning_auto: false,
55            parallel_post_turn_memory: false,
56            parallel_orchestration_vote_extraction: false,
57            background_observability_export: false,
58            streaming_policy: StreamingOptimizationPolicy::PreflightOnly,
59            max_parallel_runtime_tasks: 4,
60            post_turn: PostTurnOptimizationConfig::default(),
61        }
62    }
63}
64
65impl RuntimeOptimizationConfig {
66    /// Validates optimization settings before the runtime is built.
67    pub fn validate(&self) -> Result<()> {
68        if self.max_parallel_runtime_tasks == 0 {
69            return Err(AgentError::InvalidSpec(
70                "runtime.optimization.max_parallel_runtime_tasks must be greater than 0".into(),
71            ));
72        }
73        if self.post_turn.max_background_tasks == 0 && self.post_turn.any_background_tasks_enabled()
74        {
75            return Err(AgentError::InvalidSpec(
76                "runtime.optimization.post_turn.max_background_tasks must be greater than 0 when background maintenance is enabled".into(),
77            ));
78        }
79        if self.background_observability_export {
80            return Err(AgentError::InvalidSpec(
81                "runtime.optimization.background_observability_export requires snapshot export support and is not enabled yet".into(),
82            ));
83        }
84        let any_speculative = self.speculative_state_transitions
85            || self.speculative_skill_routing
86            || self.speculative_reasoning_auto;
87        if any_speculative {
88            if !self.enabled {
89                return Err(AgentError::InvalidSpec(
90                    "runtime.optimization.enabled must be true when speculative branch settings are enabled".into(),
91                ));
92            }
93            if self.max_speculative_llm_calls_per_turn == 0 {
94                return Err(AgentError::InvalidSpec(
95                    "runtime.optimization.max_speculative_llm_calls_per_turn must be greater than 0 when speculative branch settings are enabled".into(),
96                ));
97            }
98        }
99        if self.max_speculative_llm_calls_per_turn > self.max_parallel_runtime_tasks as u32 {
100            return Err(AgentError::InvalidSpec(
101                "runtime.optimization.max_speculative_llm_calls_per_turn must be less than or equal to max_parallel_runtime_tasks".into(),
102            ));
103        }
104        if self.post_turn.sessions != MaintenanceTaskPolicy::default() {
105            return Err(AgentError::InvalidSpec(
106                "runtime.optimization.post_turn.sessions is reserved until session maintenance scheduling is enabled".into(),
107            ));
108        }
109        if self.post_turn.memory_compression != MaintenanceTaskPolicy::default() {
110            return Err(AgentError::InvalidSpec(
111                "runtime.optimization.post_turn.memory_compression is reserved until compression scheduling is enabled".into(),
112            ));
113        }
114        Ok(())
115    }
116}
117
118/// Post-turn task policies for work that affects later turns.
119#[derive(Debug, Clone, Serialize, Deserialize)]
120#[serde(default)]
121pub struct PostTurnOptimizationConfig {
122    /// Fact extraction policy.
123    pub facts: MaintenanceTaskPolicy,
124    /// Relationship update policy.
125    pub relationships: MaintenanceTaskPolicy,
126    /// Session metadata policy.
127    pub sessions: MaintenanceTaskPolicy,
128    /// Memory compression policy.
129    pub memory_compression: MaintenanceTaskPolicy,
130    /// Maximum number of queued background tasks.
131    pub max_background_tasks: usize,
132    /// Behavior when the background queue is full.
133    pub on_background_overflow: BackgroundOverflowPolicy,
134}
135
136impl Default for PostTurnOptimizationConfig {
137    fn default() -> Self {
138        Self {
139            facts: MaintenanceTaskPolicy {
140                mode: MaintenanceMode::InlineSerial,
141                await_before_next_turn: AwaitBeforeNextTurn::Always,
142            },
143            relationships: MaintenanceTaskPolicy {
144                mode: MaintenanceMode::InlineSerial,
145                await_before_next_turn: AwaitBeforeNextTurn::Always,
146            },
147            sessions: MaintenanceTaskPolicy::default(),
148            memory_compression: MaintenanceTaskPolicy::default(),
149            max_background_tasks: 16,
150            on_background_overflow: BackgroundOverflowPolicy::RunInline,
151        }
152    }
153}
154
155impl PostTurnOptimizationConfig {
156    /// Returns true when any maintenance task may run outside the response path.
157    pub fn any_background_tasks_enabled(&self) -> bool {
158        matches!(self.facts.mode, MaintenanceMode::Background)
159            || matches!(self.relationships.mode, MaintenanceMode::Background)
160            || matches!(self.sessions.mode, MaintenanceMode::Background)
161            || matches!(self.memory_compression.mode, MaintenanceMode::Background)
162    }
163}
164
165/// Policy for one post-turn maintenance task.
166#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
167#[serde(default)]
168pub struct MaintenanceTaskPolicy {
169    /// Whether the task runs serially, concurrently, or in the background.
170    pub mode: MaintenanceMode,
171    /// Whether a later turn waits for pending background work.
172    pub await_before_next_turn: AwaitBeforeNextTurn,
173}
174
175impl Default for MaintenanceTaskPolicy {
176    fn default() -> Self {
177        Self {
178            mode: MaintenanceMode::InlineSerial,
179            await_before_next_turn: AwaitBeforeNextTurn::Always,
180        }
181    }
182}
183
184/// Execution mode for post-turn maintenance.
185#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
186#[serde(rename_all = "snake_case")]
187pub enum MaintenanceMode {
188    /// Run in the existing serial response path.
189    InlineSerial,
190    /// Run with other independent maintenance tasks and await completion.
191    InlineParallel,
192    /// Queue work after the response and apply freshness policy later.
193    Background,
194}
195
196impl Default for MaintenanceMode {
197    fn default() -> Self {
198        Self::InlineSerial
199    }
200}
201
202/// Freshness policy for pending background maintenance.
203#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
204#[serde(rename_all = "snake_case")]
205pub enum AwaitBeforeNextTurn {
206    /// Never wait for this task before a new turn.
207    Never,
208    /// Wait before a turn from the same actor.
209    SameActor,
210    /// Wait before every new turn.
211    Always,
212}
213
214impl Default for AwaitBeforeNextTurn {
215    fn default() -> Self {
216        Self::Always
217    }
218}
219
220/// Behavior when a background queue cannot accept more tasks.
221#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
222#[serde(rename_all = "snake_case")]
223pub enum BackgroundOverflowPolicy {
224    /// Run the task inline instead of dropping it.
225    RunInline,
226    /// Drop the task and record skipped maintenance.
227    Drop,
228    /// Return an error to the caller.
229    Error,
230}
231
232impl Default for BackgroundOverflowPolicy {
233    fn default() -> Self {
234        Self::RunInline
235    }
236}
237
238/// Streaming behavior when optimized routing may run before output begins.
239#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
240#[serde(rename_all = "snake_case")]
241pub enum StreamingOptimizationPolicy {
242    /// Run safe preflight routing before opening the stream.
243    PreflightOnly,
244    /// Buffer unresolved stream output until routing decisions finish.
245    BufferUntilRoutingDone,
246    /// Disable optimized streaming behavior.
247    Disabled,
248}
249
250impl Default for StreamingOptimizationPolicy {
251    fn default() -> Self {
252        Self::PreflightOnly
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259
260    #[test]
261    fn accepts_buffered_streaming_policy() {
262        let config = RuntimeOptimizationConfig {
263            enabled: true,
264            streaming_policy: StreamingOptimizationPolicy::BufferUntilRoutingDone,
265            ..Default::default()
266        };
267        assert!(config.validate().is_ok());
268    }
269
270    #[test]
271    fn rejects_reserved_session_maintenance_policy() {
272        let mut config = RuntimeOptimizationConfig {
273            enabled: true,
274            ..Default::default()
275        };
276        config.post_turn.sessions.mode = MaintenanceMode::Background;
277        assert!(config.validate().is_err());
278    }
279
280    #[test]
281    fn rejects_reserved_compression_maintenance_policy() {
282        let mut config = RuntimeOptimizationConfig {
283            enabled: true,
284            ..Default::default()
285        };
286        config.post_turn.memory_compression.mode = MaintenanceMode::Background;
287        assert!(config.validate().is_err());
288    }
289
290    #[test]
291    fn speculative_flags_require_positive_cap() {
292        let config = RuntimeOptimizationConfig {
293            enabled: true,
294            speculative_skill_routing: true,
295            max_speculative_llm_calls_per_turn: 0,
296            ..Default::default()
297        };
298        assert!(config.validate().is_err());
299    }
300
301    #[test]
302    fn speculative_cap_must_fit_parallel_limit() {
303        let config = RuntimeOptimizationConfig {
304            enabled: true,
305            speculative_skill_routing: true,
306            max_speculative_llm_calls_per_turn: 5,
307            max_parallel_runtime_tasks: 4,
308            ..Default::default()
309        };
310        assert!(config.validate().is_err());
311    }
312}