Skip to main content

hematite/agent/
recovery_recipes.rs

1use std::collections::HashMap;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
4#[serde(rename_all = "snake_case")]
5pub enum RecoveryScenario {
6    ProviderDegraded,
7    EmptyModelResponse,
8    ContextWindow,
9    PromptBudgetPressure,
10    HistoryPressure,
11    McpWorkspaceReadBlocked,
12    CurrentPlanScopeBlocked,
13    RecentFileEvidenceMissing,
14    ExactLineWindowRequired,
15    ToolLoop,
16    VerificationFailed,
17    PolicyCorrection,
18}
19
20impl RecoveryScenario {
21    pub fn label(self) -> &'static str {
22        match self {
23            RecoveryScenario::ProviderDegraded => "provider_degraded",
24            RecoveryScenario::EmptyModelResponse => "empty_model_response",
25            RecoveryScenario::ContextWindow => "context_window",
26            RecoveryScenario::PromptBudgetPressure => "prompt_budget_pressure",
27            RecoveryScenario::HistoryPressure => "history_pressure",
28            RecoveryScenario::McpWorkspaceReadBlocked => "mcp_workspace_read_blocked",
29            RecoveryScenario::CurrentPlanScopeBlocked => "current_plan_scope_blocked",
30            RecoveryScenario::RecentFileEvidenceMissing => "recent_file_evidence_missing",
31            RecoveryScenario::ExactLineWindowRequired => "exact_line_window_required",
32            RecoveryScenario::ToolLoop => "tool_loop",
33            RecoveryScenario::VerificationFailed => "verification_failed",
34            RecoveryScenario::PolicyCorrection => "policy_correction",
35        }
36    }
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
40#[serde(rename_all = "snake_case")]
41pub enum RecoveryStep {
42    RetryOnce,
43    RefreshRuntimeProfile,
44    ReducePromptBudget,
45    CompactHistory,
46    NarrowRequest,
47    UseBuiltinWorkspaceTools,
48    StayOnPlannedFiles,
49    InspectTargetFile,
50    InspectExactLineWindow,
51    StopRepeatingToolPattern,
52    FixVerificationFailure,
53    SelfCorrectToolSelection,
54}
55
56impl RecoveryStep {
57    pub fn label(self) -> &'static str {
58        match self {
59            RecoveryStep::RetryOnce => "retry_once",
60            RecoveryStep::RefreshRuntimeProfile => "refresh_runtime_profile",
61            RecoveryStep::ReducePromptBudget => "reduce_prompt_budget",
62            RecoveryStep::CompactHistory => "compact_history",
63            RecoveryStep::NarrowRequest => "narrow_request",
64            RecoveryStep::UseBuiltinWorkspaceTools => "use_builtin_workspace_tools",
65            RecoveryStep::StayOnPlannedFiles => "stay_on_planned_files",
66            RecoveryStep::InspectTargetFile => "inspect_target_file",
67            RecoveryStep::InspectExactLineWindow => "inspect_exact_line_window",
68            RecoveryStep::StopRepeatingToolPattern => "stop_repeating_tool_pattern",
69            RecoveryStep::FixVerificationFailure => "fix_verification_failure",
70            RecoveryStep::SelfCorrectToolSelection => "self_correct_tool_selection",
71        }
72    }
73}
74
75#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
76pub struct RecoveryRecipe {
77    pub scenario: RecoveryScenario,
78    pub steps: Vec<RecoveryStep>,
79    pub max_attempts: u32,
80}
81
82impl RecoveryRecipe {
83    pub fn steps_summary(&self) -> String {
84        let cap = self.steps.iter().map(|s| s.label().len()).sum::<usize>()
85            + self.steps.len().saturating_sub(1) * 4;
86        let mut s = String::with_capacity(cap);
87        for (i, step) in self.steps.iter().enumerate() {
88            if i > 0 {
89                s.push_str(" -> ");
90            }
91            s.push_str(step.label());
92        }
93        s
94    }
95}
96
97#[derive(Debug, Clone, PartialEq, Eq)]
98pub struct RecoveryPlan {
99    pub recipe: RecoveryRecipe,
100    pub next_attempt: u32,
101}
102
103impl RecoveryPlan {
104    pub fn summary(&self) -> String {
105        format!(
106            "{} [{}/{}]: {}",
107            self.recipe.scenario.label(),
108            self.next_attempt,
109            self.recipe.max_attempts.max(1),
110            self.recipe.steps_summary()
111        )
112    }
113}
114
115#[derive(Debug, Clone, PartialEq, Eq)]
116pub enum RecoveryDecision {
117    Attempt(RecoveryPlan),
118    Escalate {
119        recipe: RecoveryRecipe,
120        attempts_made: u32,
121        reason: String,
122    },
123}
124
125impl RecoveryDecision {
126    pub fn summary(&self) -> String {
127        match self {
128            RecoveryDecision::Attempt(plan) => format!("attempt {}", plan.summary()),
129            RecoveryDecision::Escalate {
130                recipe,
131                attempts_made,
132                reason,
133            } => format!(
134                "escalate {} after {}/{}: {} ({})",
135                recipe.scenario.label(),
136                attempts_made,
137                recipe.max_attempts.max(1),
138                recipe.steps_summary(),
139                reason
140            ),
141        }
142    }
143}
144
145#[derive(Debug, Clone, Default)]
146pub struct RecoveryContext {
147    attempts: HashMap<RecoveryScenario, u32>,
148    /// Total transient provider retries consumed this turn across all inference calls.
149    transient_retries_this_turn: u32,
150}
151
152/// Maximum transient provider retries allowed across an entire multi-step turn.
153const MAX_TRANSIENT_RETRIES_PER_TURN: u32 = 3;
154
155impl RecoveryContext {
156    pub fn clear(&mut self) {
157        self.attempts.clear();
158        self.transient_retries_this_turn = 0;
159    }
160
161    pub fn attempt_count(&self, scenario: RecoveryScenario) -> u32 {
162        self.attempts.get(&scenario).copied().unwrap_or(0)
163    }
164
165    /// Returns true and increments the turn-level transient retry budget if a retry
166    /// is still available. Returns false when the budget is exhausted.
167    pub fn consume_transient_retry(&mut self) -> bool {
168        if self.transient_retries_this_turn < MAX_TRANSIENT_RETRIES_PER_TURN {
169            self.transient_retries_this_turn += 1;
170            // Reset the per-scenario counter so attempt_recovery allows the attempt.
171            self.attempts.remove(&RecoveryScenario::ProviderDegraded);
172            self.attempts.remove(&RecoveryScenario::EmptyModelResponse);
173            true
174        } else {
175            false
176        }
177    }
178}
179
180pub fn recipe_for(scenario: RecoveryScenario) -> RecoveryRecipe {
181    match scenario {
182        RecoveryScenario::ProviderDegraded => RecoveryRecipe {
183            scenario,
184            steps: vec![RecoveryStep::RetryOnce],
185            max_attempts: 1,
186        },
187        RecoveryScenario::EmptyModelResponse => RecoveryRecipe {
188            scenario,
189            steps: vec![RecoveryStep::RetryOnce],
190            max_attempts: 1,
191        },
192        RecoveryScenario::ContextWindow => RecoveryRecipe {
193            scenario,
194            steps: vec![
195                RecoveryStep::RefreshRuntimeProfile,
196                RecoveryStep::ReducePromptBudget,
197                RecoveryStep::CompactHistory,
198                RecoveryStep::NarrowRequest,
199            ],
200            max_attempts: 1,
201        },
202        RecoveryScenario::PromptBudgetPressure => RecoveryRecipe {
203            scenario,
204            steps: vec![RecoveryStep::ReducePromptBudget],
205            max_attempts: 1,
206        },
207        RecoveryScenario::HistoryPressure => RecoveryRecipe {
208            scenario,
209            steps: vec![RecoveryStep::CompactHistory],
210            max_attempts: 1,
211        },
212        RecoveryScenario::McpWorkspaceReadBlocked => RecoveryRecipe {
213            scenario,
214            steps: vec![RecoveryStep::UseBuiltinWorkspaceTools],
215            max_attempts: 1,
216        },
217        RecoveryScenario::CurrentPlanScopeBlocked => RecoveryRecipe {
218            scenario,
219            steps: vec![RecoveryStep::StayOnPlannedFiles],
220            max_attempts: 1,
221        },
222        RecoveryScenario::RecentFileEvidenceMissing => RecoveryRecipe {
223            scenario,
224            steps: vec![RecoveryStep::InspectTargetFile],
225            max_attempts: 1,
226        },
227        RecoveryScenario::ExactLineWindowRequired => RecoveryRecipe {
228            scenario,
229            steps: vec![RecoveryStep::InspectExactLineWindow],
230            max_attempts: 1,
231        },
232        RecoveryScenario::ToolLoop => RecoveryRecipe {
233            scenario,
234            steps: vec![
235                RecoveryStep::StopRepeatingToolPattern,
236                RecoveryStep::NarrowRequest,
237            ],
238            max_attempts: 1,
239        },
240        RecoveryScenario::VerificationFailed => RecoveryRecipe {
241            scenario,
242            steps: vec![RecoveryStep::FixVerificationFailure],
243            max_attempts: 1,
244        },
245        RecoveryScenario::PolicyCorrection => RecoveryRecipe {
246            scenario,
247            steps: vec![RecoveryStep::SelfCorrectToolSelection],
248            max_attempts: 1,
249        },
250    }
251}
252
253pub fn plan_recovery(scenario: RecoveryScenario, ctx: &RecoveryContext) -> RecoveryPlan {
254    let recipe = recipe_for(scenario);
255    RecoveryPlan {
256        recipe,
257        next_attempt: ctx.attempt_count(scenario).saturating_add(1),
258    }
259}
260
261pub fn preview_recovery_decision(
262    scenario: RecoveryScenario,
263    ctx: &RecoveryContext,
264) -> RecoveryDecision {
265    let recipe = recipe_for(scenario);
266    let attempts = ctx.attempt_count(scenario);
267    if attempts >= recipe.max_attempts {
268        let max_attempts = recipe.max_attempts.max(1);
269        RecoveryDecision::Escalate {
270            recipe,
271            attempts_made: attempts,
272            reason: format!("max recovery attempts ({}) exhausted", max_attempts),
273        }
274    } else {
275        RecoveryDecision::Attempt(RecoveryPlan {
276            recipe,
277            next_attempt: attempts.saturating_add(1),
278        })
279    }
280}
281
282pub fn attempt_recovery(scenario: RecoveryScenario, ctx: &mut RecoveryContext) -> RecoveryDecision {
283    match preview_recovery_decision(scenario, ctx) {
284        RecoveryDecision::Attempt(plan) => {
285            ctx.attempts.insert(scenario, plan.next_attempt);
286            RecoveryDecision::Attempt(plan)
287        }
288        RecoveryDecision::Escalate {
289            recipe,
290            attempts_made,
291            reason,
292        } => RecoveryDecision::Escalate {
293            recipe,
294            attempts_made,
295            reason,
296        },
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    #[test]
305    fn context_window_recipe_matches_expected_local_recovery_flow() {
306        let recipe = recipe_for(RecoveryScenario::ContextWindow);
307        assert_eq!(recipe.max_attempts, 1);
308        assert_eq!(
309            recipe.steps,
310            vec![
311                RecoveryStep::RefreshRuntimeProfile,
312                RecoveryStep::ReducePromptBudget,
313                RecoveryStep::CompactHistory,
314                RecoveryStep::NarrowRequest,
315            ]
316        );
317        assert_eq!(
318            recipe.steps_summary(),
319            "refresh_runtime_profile -> reduce_prompt_budget -> compact_history -> narrow_request"
320        );
321    }
322
323    #[test]
324    fn provider_degraded_attempts_once_then_escalates() {
325        let mut ctx = RecoveryContext::default();
326
327        let first = attempt_recovery(RecoveryScenario::ProviderDegraded, &mut ctx);
328        match first {
329            RecoveryDecision::Attempt(plan) => {
330                assert_eq!(plan.recipe.scenario, RecoveryScenario::ProviderDegraded);
331                assert_eq!(plan.next_attempt, 1);
332            }
333            other => panic!("expected attempt, got {:?}", other),
334        }
335
336        let second = attempt_recovery(RecoveryScenario::ProviderDegraded, &mut ctx);
337        match second {
338            RecoveryDecision::Escalate {
339                recipe,
340                attempts_made,
341                reason,
342            } => {
343                assert_eq!(recipe.scenario, RecoveryScenario::ProviderDegraded);
344                assert_eq!(attempts_made, 1);
345                assert!(reason.contains("max recovery attempts"));
346            }
347            other => panic!("expected escalate, got {:?}", other),
348        }
349    }
350
351    #[test]
352    fn tool_loop_recipe_stops_repetition_before_narrowing() {
353        let recipe = recipe_for(RecoveryScenario::ToolLoop);
354        assert_eq!(
355            recipe.steps,
356            vec![
357                RecoveryStep::StopRepeatingToolPattern,
358                RecoveryStep::NarrowRequest,
359            ]
360        );
361    }
362}