ralph_workflow/reducer/event/agent.rs
1// NOTE: split from reducer/event.rs to keep the main file under line limits.
2use super::types::{default_timeout_output_kind, AgentErrorKind, TimeoutOutputKind};
3use crate::agents::AgentRole;
4use crate::executor::ChildProcessInfo;
5use serde::{Deserialize, Serialize};
6
7/// Agent invocation and chain management events.
8///
9/// Events related to agent execution, fallback chains, model switching,
10/// rate limiting, and retry cycles. The agent chain provides fault tolerance
11/// through multiple fallback levels:
12///
13/// 1. Model level: Try different models for the same agent
14/// 2. Agent level: Switch to a fallback agent
15/// 3. Retry cycle: Start over with exponential backoff
16///
17/// # State Transitions
18///
19/// - `InvocationFailed(retriable=true)`: Advances to next model
20/// - `InvocationFailed(retriable=false)`: Typically switches to next agent (policy may vary by kind)
21/// - `RateLimited`: Typically immediate agent switch with prompt preservation
22/// - `ChainExhausted`: Starts new retry cycle
23/// - `InvocationSucceeded`: Clears continuation prompt
24#[derive(Clone, Serialize, Deserialize, Debug)]
25pub enum AgentEvent {
26 /// Agent invocation started.
27 InvocationStarted {
28 /// The role this agent is fulfilling.
29 role: AgentRole,
30 /// The agent being invoked.
31 agent: String,
32 /// The model being used, if specified.
33 model: Option<String>,
34 },
35 /// Agent invocation succeeded.
36 InvocationSucceeded {
37 /// The role this agent fulfilled.
38 role: AgentRole,
39 /// The agent that succeeded.
40 agent: String,
41 },
42 /// Agent invocation failed.
43 InvocationFailed {
44 /// The role this agent was fulfilling.
45 role: AgentRole,
46 /// The agent that failed.
47 agent: String,
48 /// The exit code from the agent process.
49 exit_code: i32,
50 /// The kind of error that occurred.
51 error_kind: AgentErrorKind,
52 /// Whether this error is retriable with the same agent.
53 retriable: bool,
54 },
55 /// Fallback triggered to switch to a different agent.
56 FallbackTriggered {
57 /// The role being fulfilled.
58 role: AgentRole,
59 /// The agent being switched from.
60 from_agent: String,
61 /// The agent being switched to.
62 to_agent: String,
63 },
64 /// Model fallback triggered within the same agent.
65 ModelFallbackTriggered {
66 /// The role being fulfilled.
67 role: AgentRole,
68 /// The agent whose model is changing.
69 agent: String,
70 /// The model being switched from.
71 from_model: String,
72 /// The model being switched to.
73 to_model: String,
74 },
75 /// Retry cycle started (all agents exhausted, starting over).
76 RetryCycleStarted {
77 /// The role being retried.
78 role: AgentRole,
79 /// The cycle number starting.
80 cycle: u32,
81 },
82 /// Agent chain exhausted (no more agents/models to try).
83 ChainExhausted {
84 /// The role whose chain is exhausted.
85 role: AgentRole,
86 },
87 /// Agent chain initialized with available agents.
88 ChainInitialized {
89 /// The role this chain is for.
90 role: AgentRole,
91 /// The agents available in this chain.
92 agents: Vec<String>,
93 /// Maximum number of retry cycles allowed for this chain.
94 max_cycles: u32,
95 /// Base retry-cycle delay in milliseconds.
96 retry_delay_ms: u64,
97 /// Exponential backoff multiplier.
98 backoff_multiplier: f64,
99 /// Maximum backoff delay in milliseconds.
100 max_backoff_ms: u64,
101 },
102 /// Agent hit rate limit (429).
103 ///
104 /// Effects/executors emit this as a *fact* event. The reducer decides
105 /// whether/when to switch agents.
106 RateLimited {
107 /// The role being fulfilled.
108 role: AgentRole,
109 /// The agent that hit the rate limit.
110 agent: String,
111 /// The prompt that was being executed when rate limit was hit.
112 /// This allows the next agent to continue the same work.
113 prompt_context: Option<String>,
114 },
115
116 /// Agent hit authentication failure (401/403).
117 ///
118 /// Effects/executors emit this as a *fact* event. The reducer decides
119 /// whether/when to switch agents.
120 AuthFailed {
121 /// The role being fulfilled.
122 role: AgentRole,
123 /// The agent that failed authentication.
124 agent: String,
125 },
126
127 /// Agent hit an idle timeout.
128 ///
129 /// Emitted as a fact; the reducer decides retry vs fallback based on `output_kind`.
130 /// `NoOutput` triggers immediate agent switch; `PartialOutput` uses the same-agent
131 /// retry budget (same semantics as before this feature).
132 TimedOut {
133 /// The role being fulfilled.
134 role: AgentRole,
135 /// The agent that timed out.
136 agent: String,
137 /// Whether the agent produced any output before timing out.
138 #[serde(default = "default_timeout_output_kind")]
139 output_kind: TimeoutOutputKind,
140 /// Path to the agent's logfile (for context extraction on `PartialOutput` retry).
141 ///
142 /// When `output_kind` is `PartialOutput` and the agent has no session ID,
143 /// this path is used to extract context for the retry prompt.
144 #[serde(default)]
145 logfile_path: Option<String>,
146 /// Child process status when the timeout was enforced.
147 ///
148 /// `None` if no children existed or child checking was disabled.
149 /// When `Some`, contains the child count and cumulative CPU time at timeout.
150 #[serde(default)]
151 child_status_at_timeout: Option<ChildProcessInfo>,
152 },
153
154 /// Session established with agent.
155 ///
156 /// Emitted when an agent response includes a session ID that can be
157 /// used for XSD retry continuation. This enables reusing the same
158 /// session when retrying due to validation failures.
159 SessionEstablished {
160 /// The role this agent is fulfilling.
161 role: AgentRole,
162 /// The agent name.
163 agent: String,
164 /// The session ID returned by the agent.
165 session_id: String,
166 },
167
168 /// XSD validation failed for agent output.
169 ///
170 /// Emitted when agent output cannot be parsed or fails XSD validation.
171 /// Distinct from `OutputValidationFailed` events in phase-specific enums,
172 /// this is the canonical XSD retry trigger that the reducer uses to
173 /// decide whether to retry with the same agent/session or advance the chain.
174 XsdValidationFailed {
175 /// The role whose output failed validation.
176 role: AgentRole,
177 /// The artifact type that failed validation.
178 artifact: crate::reducer::state::ArtifactType,
179 /// Error message from validation.
180 error: String,
181 /// Current XSD retry count for this artifact.
182 retry_count: u32,
183 },
184
185 /// Template rendering failed due to missing required variables or unresolved placeholders.
186 ///
187 /// Emitted when a prompt template cannot be rendered because required variables
188 /// are missing or unresolved placeholders (e.g., `{{VAR}}`) remain in the output.
189 /// The reducer decides fallback policy, typically switching to the next agent.
190 TemplateVariablesInvalid {
191 /// The role whose template failed to render.
192 role: AgentRole,
193 /// The name of the template that failed.
194 template_name: String,
195 /// Variables that were required but not provided.
196 missing_variables: Vec<String>,
197 /// Placeholder patterns that remain unresolved in the rendered output.
198 unresolved_placeholders: Vec<String>,
199 },
200
201 /// Timeout context written to temp file for session-less agent retry.
202 ///
203 /// Emitted when a timeout with meaningful output occurs but the agent doesn't
204 /// support session IDs. The prior context is extracted from the logfile and
205 /// written to a temp file for the retry prompt to reference.
206 TimeoutContextWritten {
207 /// The role this agent is fulfilling.
208 role: AgentRole,
209 /// Source logfile path the context was extracted from.
210 logfile_path: String,
211 /// Target temp file path where context was written.
212 context_path: String,
213 },
214}