ralph_workflow/reducer/event/agent.rs
1// NOTE: split from reducer/event.rs to keep the main file under line limits.
2use super::types::{default_timeout_output_kind, AgentErrorKind, TimeoutOutputKind};
3use crate::agents::{AgentDrain, AgentRole};
4use crate::common::domain_types::{AgentName, ModelName};
5use crate::ChildProcessInfo;
6use serde::{Deserialize, Serialize};
7
8/// Agent invocation and chain management events.
9///
10/// Events related to agent execution, fallback chains, model switching,
11/// rate limiting, and retry cycles. The agent chain provides fault tolerance
12/// through multiple fallback levels:
13///
14/// 1. Model level: Try different models for the same agent
15/// 2. Agent level: Switch to a fallback agent
16/// 3. Retry cycle: Start over with exponential backoff
17///
18/// # State Transitions
19///
20/// - `InvocationFailed(retriable=true)`: Advances to next model
21/// - `InvocationFailed(retriable=false)`: Typically switches to next agent (policy may vary by kind)
22/// - `RateLimited`: Typically immediate agent switch with prompt preservation
23/// - `ChainExhausted`: Starts new retry cycle
24/// - `InvocationSucceeded`: Clears continuation prompt
25#[derive(Clone, Serialize, Deserialize, Debug, PartialEq)]
26pub enum AgentEvent {
27 /// Agent invocation started.
28 InvocationStarted {
29 /// Compatibility role metadata for the active drain.
30 ///
31 /// Runtime routing is drain-owned; reducers use explicit drain state as the
32 /// authoritative consumer identity.
33 role: AgentRole,
34 /// The agent being invoked.
35 agent: AgentName,
36 /// The model being used, if specified.
37 model: Option<ModelName>,
38 },
39 /// Agent invocation succeeded.
40 InvocationSucceeded {
41 /// Compatibility role metadata for the active drain.
42 role: AgentRole,
43 /// The agent that succeeded.
44 agent: AgentName,
45 },
46 /// Agent invocation failed.
47 InvocationFailed {
48 /// Compatibility role metadata for the active drain.
49 role: AgentRole,
50 /// The agent that failed.
51 agent: AgentName,
52 /// The exit code from the agent process.
53 exit_code: i32,
54 /// The kind of error that occurred.
55 error_kind: AgentErrorKind,
56 /// Whether this error is retriable with the same agent.
57 retriable: bool,
58 },
59 /// Fallback triggered to switch to a different agent.
60 FallbackTriggered {
61 /// The role being fulfilled.
62 role: AgentRole,
63 /// The agent being switched from.
64 from_agent: AgentName,
65 /// The agent being switched to.
66 to_agent: AgentName,
67 },
68 /// Model fallback triggered within the same agent.
69 ModelFallbackTriggered {
70 /// The role being fulfilled.
71 role: AgentRole,
72 /// The agent whose model is changing.
73 agent: AgentName,
74 /// The model being switched from.
75 from_model: ModelName,
76 /// The model being switched to.
77 to_model: ModelName,
78 },
79 /// Retry cycle started (all agents exhausted, starting over).
80 RetryCycleStarted {
81 /// The role being retried.
82 role: AgentRole,
83 /// The cycle number starting.
84 cycle: u32,
85 },
86 /// Agent chain exhausted (no more agents/models to try).
87 ChainExhausted {
88 /// The role whose chain is exhausted.
89 role: AgentRole,
90 },
91 /// Agent chain initialized with available agents.
92 ChainInitialized {
93 /// The explicit runtime drain this chain is for.
94 drain: AgentDrain,
95 /// The agents available in this chain.
96 agents: Vec<AgentName>,
97 /// Maximum number of retry cycles allowed for this chain.
98 max_cycles: u32,
99 /// Base retry-cycle delay in milliseconds.
100 retry_delay_ms: u64,
101 /// Exponential backoff multiplier.
102 backoff_multiplier: f64,
103 /// Maximum backoff delay in milliseconds.
104 max_backoff_ms: u64,
105 },
106 /// Agent hit rate limit (429).
107 ///
108 /// Effects/executors emit this as a *fact* event. The reducer decides
109 /// whether/when to switch agents.
110 RateLimited {
111 /// The role being fulfilled.
112 role: AgentRole,
113 /// The agent that hit the rate limit.
114 agent: AgentName,
115 /// The prompt that was being executed when rate limit was hit.
116 /// This allows the next agent to continue the same work.
117 prompt_context: Option<String>,
118 },
119
120 /// Agent hit authentication failure (401/403).
121 ///
122 /// Effects/executors emit this as a *fact* event. The reducer decides
123 /// whether/when to switch agents.
124 AuthFailed {
125 /// The role being fulfilled.
126 role: AgentRole,
127 /// The agent that failed authentication.
128 agent: AgentName,
129 },
130
131 /// Agent hit an idle timeout.
132 ///
133 /// Emitted as a fact; the reducer decides retry vs fallback based on `output_kind`.
134 /// `NoOutput` triggers immediate agent switch; `PartialOutput` uses the same-agent
135 /// retry budget (same semantics as before this feature).
136 TimedOut {
137 /// The role being fulfilled.
138 role: AgentRole,
139 /// The agent that timed out.
140 agent: AgentName,
141 /// Whether the agent produced any output before timing out.
142 #[serde(default = "default_timeout_output_kind")]
143 output_kind: TimeoutOutputKind,
144 /// Path to the agent's logfile (for context extraction on `PartialOutput` retry).
145 ///
146 /// When `output_kind` is `PartialOutput` and the agent has no session ID,
147 /// this path is used to extract context for the retry prompt.
148 #[serde(default)]
149 logfile_path: Option<String>,
150 /// Child process status when the timeout was enforced.
151 ///
152 /// `None` if no children existed or child checking was disabled.
153 /// When `Some`, contains the child count and cumulative CPU time at timeout.
154 #[serde(default)]
155 child_status_at_timeout: Option<ChildProcessInfo>,
156 },
157
158 /// Session established with agent.
159 ///
160 /// Emitted when an agent response includes a session ID that can be
161 /// used for XSD retry continuation. This enables reusing the same
162 /// session when retrying due to validation failures.
163 SessionEstablished {
164 /// The role this agent is fulfilling.
165 role: AgentRole,
166 /// The agent name.
167 agent: AgentName,
168 /// The session ID returned by the agent.
169 session_id: String,
170 },
171
172 /// XSD validation failed for agent output.
173 ///
174 /// Emitted when agent output cannot be parsed or fails XSD validation.
175 /// Distinct from `OutputValidationFailed` events in phase-specific enums,
176 /// this is the canonical XSD retry trigger that the reducer uses to
177 /// decide whether to retry with the same agent/session or advance the chain.
178 XsdValidationFailed {
179 /// The role whose output failed validation.
180 role: AgentRole,
181 /// The artifact type that failed validation.
182 artifact: crate::reducer::state::ArtifactType,
183 /// Error message from validation.
184 error: String,
185 /// Current XSD retry count for this artifact.
186 retry_count: u32,
187 },
188
189 /// Template rendering failed due to missing required variables or unresolved placeholders.
190 ///
191 /// Emitted when a prompt template cannot be rendered because required variables
192 /// are missing or unresolved placeholders (e.g., `{{VAR}}`) remain in the output.
193 /// The reducer decides fallback policy, typically switching to the next agent.
194 TemplateVariablesInvalid {
195 /// The role whose template failed to render.
196 role: AgentRole,
197 /// The name of the template that failed.
198 template_name: String,
199 /// Variables that were required but not provided.
200 missing_variables: Vec<String>,
201 /// Placeholder patterns that remain unresolved in the rendered output.
202 unresolved_placeholders: Vec<String>,
203 },
204
205 /// Timeout context written to temp file for session-less agent retry.
206 ///
207 /// Emitted when a timeout with meaningful output occurs but the agent doesn't
208 /// support session IDs. The prior context is extracted from the logfile and
209 /// written to a temp file for the retry prompt to reference.
210 TimeoutContextWritten {
211 /// The role this agent is fulfilling.
212 role: AgentRole,
213 /// Source logfile path the context was extracted from.
214 logfile_path: String,
215 /// Target temp file path where context was written.
216 context_path: String,
217 },
218}