Skip to main content

hematite/agent/
recovery_recipes.rs

1use std::collections::HashMap;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
4#[serde(rename_all = "snake_case")]
5pub enum RecoveryScenario {
6    ProviderDegraded,
7    EmptyModelResponse,
8    ContextWindow,
9    PromptBudgetPressure,
10    HistoryPressure,
11    McpWorkspaceReadBlocked,
12    CurrentPlanScopeBlocked,
13    RecentFileEvidenceMissing,
14    ExactLineWindowRequired,
15    ToolLoop,
16    VerificationFailed,
17}
18
19impl RecoveryScenario {
20    pub fn label(self) -> &'static str {
21        match self {
22            RecoveryScenario::ProviderDegraded => "provider_degraded",
23            RecoveryScenario::EmptyModelResponse => "empty_model_response",
24            RecoveryScenario::ContextWindow => "context_window",
25            RecoveryScenario::PromptBudgetPressure => "prompt_budget_pressure",
26            RecoveryScenario::HistoryPressure => "history_pressure",
27            RecoveryScenario::McpWorkspaceReadBlocked => "mcp_workspace_read_blocked",
28            RecoveryScenario::CurrentPlanScopeBlocked => "current_plan_scope_blocked",
29            RecoveryScenario::RecentFileEvidenceMissing => "recent_file_evidence_missing",
30            RecoveryScenario::ExactLineWindowRequired => "exact_line_window_required",
31            RecoveryScenario::ToolLoop => "tool_loop",
32            RecoveryScenario::VerificationFailed => "verification_failed",
33        }
34    }
35}
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
38#[serde(rename_all = "snake_case")]
39pub enum RecoveryStep {
40    RetryOnce,
41    RefreshRuntimeProfile,
42    ReducePromptBudget,
43    CompactHistory,
44    NarrowRequest,
45    UseBuiltinWorkspaceTools,
46    StayOnPlannedFiles,
47    InspectTargetFile,
48    InspectExactLineWindow,
49    StopRepeatingToolPattern,
50    FixVerificationFailure,
51}
52
53impl RecoveryStep {
54    pub fn label(self) -> &'static str {
55        match self {
56            RecoveryStep::RetryOnce => "retry_once",
57            RecoveryStep::RefreshRuntimeProfile => "refresh_runtime_profile",
58            RecoveryStep::ReducePromptBudget => "reduce_prompt_budget",
59            RecoveryStep::CompactHistory => "compact_history",
60            RecoveryStep::NarrowRequest => "narrow_request",
61            RecoveryStep::UseBuiltinWorkspaceTools => "use_builtin_workspace_tools",
62            RecoveryStep::StayOnPlannedFiles => "stay_on_planned_files",
63            RecoveryStep::InspectTargetFile => "inspect_target_file",
64            RecoveryStep::InspectExactLineWindow => "inspect_exact_line_window",
65            RecoveryStep::StopRepeatingToolPattern => "stop_repeating_tool_pattern",
66            RecoveryStep::FixVerificationFailure => "fix_verification_failure",
67        }
68    }
69}
70
71#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
72pub struct RecoveryRecipe {
73    pub scenario: RecoveryScenario,
74    pub steps: Vec<RecoveryStep>,
75    pub max_attempts: u32,
76}
77
78impl RecoveryRecipe {
79    pub fn steps_summary(&self) -> String {
80        self.steps
81            .iter()
82            .map(|step| step.label())
83            .collect::<Vec<_>>()
84            .join(" -> ")
85    }
86}
87
88#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct RecoveryPlan {
90    pub recipe: RecoveryRecipe,
91    pub next_attempt: u32,
92}
93
94impl RecoveryPlan {
95    pub fn summary(&self) -> String {
96        format!(
97            "{} [{}/{}]: {}",
98            self.recipe.scenario.label(),
99            self.next_attempt,
100            self.recipe.max_attempts.max(1),
101            self.recipe.steps_summary()
102        )
103    }
104}
105
106#[derive(Debug, Clone, PartialEq, Eq)]
107pub enum RecoveryDecision {
108    Attempt(RecoveryPlan),
109    Escalate {
110        recipe: RecoveryRecipe,
111        attempts_made: u32,
112        reason: String,
113    },
114}
115
116impl RecoveryDecision {
117    pub fn summary(&self) -> String {
118        match self {
119            RecoveryDecision::Attempt(plan) => format!("attempt {}", plan.summary()),
120            RecoveryDecision::Escalate {
121                recipe,
122                attempts_made,
123                reason,
124            } => format!(
125                "escalate {} after {}/{}: {} ({})",
126                recipe.scenario.label(),
127                attempts_made,
128                recipe.max_attempts.max(1),
129                recipe.steps_summary(),
130                reason
131            ),
132        }
133    }
134}
135
136#[derive(Debug, Clone, Default)]
137pub struct RecoveryContext {
138    attempts: HashMap<RecoveryScenario, u32>,
139    /// Total transient provider retries consumed this turn across all inference calls.
140    transient_retries_this_turn: u32,
141}
142
143/// Maximum transient provider retries allowed across an entire multi-step turn.
144const MAX_TRANSIENT_RETRIES_PER_TURN: u32 = 3;
145
146impl RecoveryContext {
147    pub fn clear(&mut self) {
148        self.attempts.clear();
149        self.transient_retries_this_turn = 0;
150    }
151
152    pub fn attempt_count(&self, scenario: RecoveryScenario) -> u32 {
153        self.attempts.get(&scenario).copied().unwrap_or(0)
154    }
155
156    /// Returns true and increments the turn-level transient retry budget if a retry
157    /// is still available. Returns false when the budget is exhausted.
158    pub fn consume_transient_retry(&mut self) -> bool {
159        if self.transient_retries_this_turn < MAX_TRANSIENT_RETRIES_PER_TURN {
160            self.transient_retries_this_turn += 1;
161            // Reset the per-scenario counter so attempt_recovery allows the attempt.
162            self.attempts.remove(&RecoveryScenario::ProviderDegraded);
163            self.attempts.remove(&RecoveryScenario::EmptyModelResponse);
164            true
165        } else {
166            false
167        }
168    }
169}
170
171pub fn recipe_for(scenario: RecoveryScenario) -> RecoveryRecipe {
172    match scenario {
173        RecoveryScenario::ProviderDegraded => RecoveryRecipe {
174            scenario,
175            steps: vec![RecoveryStep::RetryOnce],
176            max_attempts: 1,
177        },
178        RecoveryScenario::EmptyModelResponse => RecoveryRecipe {
179            scenario,
180            steps: vec![RecoveryStep::RetryOnce],
181            max_attempts: 1,
182        },
183        RecoveryScenario::ContextWindow => RecoveryRecipe {
184            scenario,
185            steps: vec![
186                RecoveryStep::RefreshRuntimeProfile,
187                RecoveryStep::ReducePromptBudget,
188                RecoveryStep::CompactHistory,
189                RecoveryStep::NarrowRequest,
190            ],
191            max_attempts: 1,
192        },
193        RecoveryScenario::PromptBudgetPressure => RecoveryRecipe {
194            scenario,
195            steps: vec![RecoveryStep::ReducePromptBudget],
196            max_attempts: 1,
197        },
198        RecoveryScenario::HistoryPressure => RecoveryRecipe {
199            scenario,
200            steps: vec![RecoveryStep::CompactHistory],
201            max_attempts: 1,
202        },
203        RecoveryScenario::McpWorkspaceReadBlocked => RecoveryRecipe {
204            scenario,
205            steps: vec![RecoveryStep::UseBuiltinWorkspaceTools],
206            max_attempts: 1,
207        },
208        RecoveryScenario::CurrentPlanScopeBlocked => RecoveryRecipe {
209            scenario,
210            steps: vec![RecoveryStep::StayOnPlannedFiles],
211            max_attempts: 1,
212        },
213        RecoveryScenario::RecentFileEvidenceMissing => RecoveryRecipe {
214            scenario,
215            steps: vec![RecoveryStep::InspectTargetFile],
216            max_attempts: 1,
217        },
218        RecoveryScenario::ExactLineWindowRequired => RecoveryRecipe {
219            scenario,
220            steps: vec![RecoveryStep::InspectExactLineWindow],
221            max_attempts: 1,
222        },
223        RecoveryScenario::ToolLoop => RecoveryRecipe {
224            scenario,
225            steps: vec![
226                RecoveryStep::StopRepeatingToolPattern,
227                RecoveryStep::NarrowRequest,
228            ],
229            max_attempts: 1,
230        },
231        RecoveryScenario::VerificationFailed => RecoveryRecipe {
232            scenario,
233            steps: vec![RecoveryStep::FixVerificationFailure],
234            max_attempts: 1,
235        },
236    }
237}
238
239pub fn plan_recovery(scenario: RecoveryScenario, ctx: &RecoveryContext) -> RecoveryPlan {
240    let recipe = recipe_for(scenario);
241    RecoveryPlan {
242        recipe,
243        next_attempt: ctx.attempt_count(scenario).saturating_add(1),
244    }
245}
246
247pub fn preview_recovery_decision(
248    scenario: RecoveryScenario,
249    ctx: &RecoveryContext,
250) -> RecoveryDecision {
251    let recipe = recipe_for(scenario);
252    let attempts = ctx.attempt_count(scenario);
253    if attempts >= recipe.max_attempts {
254        let max_attempts = recipe.max_attempts.max(1);
255        RecoveryDecision::Escalate {
256            recipe,
257            attempts_made: attempts,
258            reason: format!("max recovery attempts ({}) exhausted", max_attempts),
259        }
260    } else {
261        RecoveryDecision::Attempt(RecoveryPlan {
262            recipe,
263            next_attempt: attempts.saturating_add(1),
264        })
265    }
266}
267
268pub fn attempt_recovery(scenario: RecoveryScenario, ctx: &mut RecoveryContext) -> RecoveryDecision {
269    match preview_recovery_decision(scenario, ctx) {
270        RecoveryDecision::Attempt(plan) => {
271            ctx.attempts.insert(scenario, plan.next_attempt);
272            RecoveryDecision::Attempt(plan)
273        }
274        RecoveryDecision::Escalate {
275            recipe,
276            attempts_made,
277            reason,
278        } => RecoveryDecision::Escalate {
279            recipe,
280            attempts_made,
281            reason,
282        },
283    }
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289
290    #[test]
291    fn context_window_recipe_matches_expected_local_recovery_flow() {
292        let recipe = recipe_for(RecoveryScenario::ContextWindow);
293        assert_eq!(recipe.max_attempts, 1);
294        assert_eq!(
295            recipe.steps,
296            vec![
297                RecoveryStep::RefreshRuntimeProfile,
298                RecoveryStep::ReducePromptBudget,
299                RecoveryStep::CompactHistory,
300                RecoveryStep::NarrowRequest,
301            ]
302        );
303        assert_eq!(
304            recipe.steps_summary(),
305            "refresh_runtime_profile -> reduce_prompt_budget -> compact_history -> narrow_request"
306        );
307    }
308
309    #[test]
310    fn provider_degraded_attempts_once_then_escalates() {
311        let mut ctx = RecoveryContext::default();
312
313        let first = attempt_recovery(RecoveryScenario::ProviderDegraded, &mut ctx);
314        match first {
315            RecoveryDecision::Attempt(plan) => {
316                assert_eq!(plan.recipe.scenario, RecoveryScenario::ProviderDegraded);
317                assert_eq!(plan.next_attempt, 1);
318            }
319            other => panic!("expected attempt, got {:?}", other),
320        }
321
322        let second = attempt_recovery(RecoveryScenario::ProviderDegraded, &mut ctx);
323        match second {
324            RecoveryDecision::Escalate {
325                recipe,
326                attempts_made,
327                reason,
328            } => {
329                assert_eq!(recipe.scenario, RecoveryScenario::ProviderDegraded);
330                assert_eq!(attempts_made, 1);
331                assert!(reason.contains("max recovery attempts"));
332            }
333            other => panic!("expected escalate, got {:?}", other),
334        }
335    }
336
337    #[test]
338    fn tool_loop_recipe_stops_repetition_before_narrowing() {
339        let recipe = recipe_for(RecoveryScenario::ToolLoop);
340        assert_eq!(
341            recipe.steps,
342            vec![
343                RecoveryStep::StopRepeatingToolPattern,
344                RecoveryStep::NarrowRequest,
345            ]
346        );
347    }
348}