Skip to main content

hematite/agent/
recovery_recipes.rs

1use std::collections::HashMap;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
4#[serde(rename_all = "snake_case")]
5pub enum RecoveryScenario {
6    ProviderDegraded,
7    EmptyModelResponse,
8    ContextWindow,
9    PromptBudgetPressure,
10    HistoryPressure,
11    McpWorkspaceReadBlocked,
12    CurrentPlanScopeBlocked,
13    RecentFileEvidenceMissing,
14    ExactLineWindowRequired,
15    ToolLoop,
16    VerificationFailed,
17    PolicyCorrection,
18}
19
20impl RecoveryScenario {
21    pub fn label(self) -> &'static str {
22        match self {
23            RecoveryScenario::ProviderDegraded => "provider_degraded",
24            RecoveryScenario::EmptyModelResponse => "empty_model_response",
25            RecoveryScenario::ContextWindow => "context_window",
26            RecoveryScenario::PromptBudgetPressure => "prompt_budget_pressure",
27            RecoveryScenario::HistoryPressure => "history_pressure",
28            RecoveryScenario::McpWorkspaceReadBlocked => "mcp_workspace_read_blocked",
29            RecoveryScenario::CurrentPlanScopeBlocked => "current_plan_scope_blocked",
30            RecoveryScenario::RecentFileEvidenceMissing => "recent_file_evidence_missing",
31            RecoveryScenario::ExactLineWindowRequired => "exact_line_window_required",
32            RecoveryScenario::ToolLoop => "tool_loop",
33            RecoveryScenario::VerificationFailed => "verification_failed",
34            RecoveryScenario::PolicyCorrection => "policy_correction",
35        }
36    }
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
40#[serde(rename_all = "snake_case")]
41pub enum RecoveryStep {
42    RetryOnce,
43    RefreshRuntimeProfile,
44    ReducePromptBudget,
45    CompactHistory,
46    NarrowRequest,
47    UseBuiltinWorkspaceTools,
48    StayOnPlannedFiles,
49    InspectTargetFile,
50    InspectExactLineWindow,
51    StopRepeatingToolPattern,
52    FixVerificationFailure,
53    SelfCorrectToolSelection,
54}
55
56impl RecoveryStep {
57    pub fn label(self) -> &'static str {
58        match self {
59            RecoveryStep::RetryOnce => "retry_once",
60            RecoveryStep::RefreshRuntimeProfile => "refresh_runtime_profile",
61            RecoveryStep::ReducePromptBudget => "reduce_prompt_budget",
62            RecoveryStep::CompactHistory => "compact_history",
63            RecoveryStep::NarrowRequest => "narrow_request",
64            RecoveryStep::UseBuiltinWorkspaceTools => "use_builtin_workspace_tools",
65            RecoveryStep::StayOnPlannedFiles => "stay_on_planned_files",
66            RecoveryStep::InspectTargetFile => "inspect_target_file",
67            RecoveryStep::InspectExactLineWindow => "inspect_exact_line_window",
68            RecoveryStep::StopRepeatingToolPattern => "stop_repeating_tool_pattern",
69            RecoveryStep::FixVerificationFailure => "fix_verification_failure",
70            RecoveryStep::SelfCorrectToolSelection => "self_correct_tool_selection",
71        }
72    }
73}
74
75#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
76pub struct RecoveryRecipe {
77    pub scenario: RecoveryScenario,
78    pub steps: Vec<RecoveryStep>,
79    pub max_attempts: u32,
80}
81
82impl RecoveryRecipe {
83    pub fn steps_summary(&self) -> String {
84        self.steps
85            .iter()
86            .map(|step| step.label())
87            .collect::<Vec<_>>()
88            .join(" -> ")
89    }
90}
91
92#[derive(Debug, Clone, PartialEq, Eq)]
93pub struct RecoveryPlan {
94    pub recipe: RecoveryRecipe,
95    pub next_attempt: u32,
96}
97
98impl RecoveryPlan {
99    pub fn summary(&self) -> String {
100        format!(
101            "{} [{}/{}]: {}",
102            self.recipe.scenario.label(),
103            self.next_attempt,
104            self.recipe.max_attempts.max(1),
105            self.recipe.steps_summary()
106        )
107    }
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub enum RecoveryDecision {
112    Attempt(RecoveryPlan),
113    Escalate {
114        recipe: RecoveryRecipe,
115        attempts_made: u32,
116        reason: String,
117    },
118}
119
120impl RecoveryDecision {
121    pub fn summary(&self) -> String {
122        match self {
123            RecoveryDecision::Attempt(plan) => format!("attempt {}", plan.summary()),
124            RecoveryDecision::Escalate {
125                recipe,
126                attempts_made,
127                reason,
128            } => format!(
129                "escalate {} after {}/{}: {} ({})",
130                recipe.scenario.label(),
131                attempts_made,
132                recipe.max_attempts.max(1),
133                recipe.steps_summary(),
134                reason
135            ),
136        }
137    }
138}
139
140#[derive(Debug, Clone, Default)]
141pub struct RecoveryContext {
142    attempts: HashMap<RecoveryScenario, u32>,
143    /// Total transient provider retries consumed this turn across all inference calls.
144    transient_retries_this_turn: u32,
145}
146
147/// Maximum transient provider retries allowed across an entire multi-step turn.
148const MAX_TRANSIENT_RETRIES_PER_TURN: u32 = 3;
149
150impl RecoveryContext {
151    pub fn clear(&mut self) {
152        self.attempts.clear();
153        self.transient_retries_this_turn = 0;
154    }
155
156    pub fn attempt_count(&self, scenario: RecoveryScenario) -> u32 {
157        self.attempts.get(&scenario).copied().unwrap_or(0)
158    }
159
160    /// Returns true and increments the turn-level transient retry budget if a retry
161    /// is still available. Returns false when the budget is exhausted.
162    pub fn consume_transient_retry(&mut self) -> bool {
163        if self.transient_retries_this_turn < MAX_TRANSIENT_RETRIES_PER_TURN {
164            self.transient_retries_this_turn += 1;
165            // Reset the per-scenario counter so attempt_recovery allows the attempt.
166            self.attempts.remove(&RecoveryScenario::ProviderDegraded);
167            self.attempts.remove(&RecoveryScenario::EmptyModelResponse);
168            true
169        } else {
170            false
171        }
172    }
173}
174
175pub fn recipe_for(scenario: RecoveryScenario) -> RecoveryRecipe {
176    match scenario {
177        RecoveryScenario::ProviderDegraded => RecoveryRecipe {
178            scenario,
179            steps: vec![RecoveryStep::RetryOnce],
180            max_attempts: 1,
181        },
182        RecoveryScenario::EmptyModelResponse => RecoveryRecipe {
183            scenario,
184            steps: vec![RecoveryStep::RetryOnce],
185            max_attempts: 1,
186        },
187        RecoveryScenario::ContextWindow => RecoveryRecipe {
188            scenario,
189            steps: vec![
190                RecoveryStep::RefreshRuntimeProfile,
191                RecoveryStep::ReducePromptBudget,
192                RecoveryStep::CompactHistory,
193                RecoveryStep::NarrowRequest,
194            ],
195            max_attempts: 1,
196        },
197        RecoveryScenario::PromptBudgetPressure => RecoveryRecipe {
198            scenario,
199            steps: vec![RecoveryStep::ReducePromptBudget],
200            max_attempts: 1,
201        },
202        RecoveryScenario::HistoryPressure => RecoveryRecipe {
203            scenario,
204            steps: vec![RecoveryStep::CompactHistory],
205            max_attempts: 1,
206        },
207        RecoveryScenario::McpWorkspaceReadBlocked => RecoveryRecipe {
208            scenario,
209            steps: vec![RecoveryStep::UseBuiltinWorkspaceTools],
210            max_attempts: 1,
211        },
212        RecoveryScenario::CurrentPlanScopeBlocked => RecoveryRecipe {
213            scenario,
214            steps: vec![RecoveryStep::StayOnPlannedFiles],
215            max_attempts: 1,
216        },
217        RecoveryScenario::RecentFileEvidenceMissing => RecoveryRecipe {
218            scenario,
219            steps: vec![RecoveryStep::InspectTargetFile],
220            max_attempts: 1,
221        },
222        RecoveryScenario::ExactLineWindowRequired => RecoveryRecipe {
223            scenario,
224            steps: vec![RecoveryStep::InspectExactLineWindow],
225            max_attempts: 1,
226        },
227        RecoveryScenario::ToolLoop => RecoveryRecipe {
228            scenario,
229            steps: vec![
230                RecoveryStep::StopRepeatingToolPattern,
231                RecoveryStep::NarrowRequest,
232            ],
233            max_attempts: 1,
234        },
235        RecoveryScenario::VerificationFailed => RecoveryRecipe {
236            scenario,
237            steps: vec![RecoveryStep::FixVerificationFailure],
238            max_attempts: 1,
239        },
240        RecoveryScenario::PolicyCorrection => RecoveryRecipe {
241            scenario,
242            steps: vec![RecoveryStep::SelfCorrectToolSelection],
243            max_attempts: 1,
244        },
245    }
246}
247
248pub fn plan_recovery(scenario: RecoveryScenario, ctx: &RecoveryContext) -> RecoveryPlan {
249    let recipe = recipe_for(scenario);
250    RecoveryPlan {
251        recipe,
252        next_attempt: ctx.attempt_count(scenario).saturating_add(1),
253    }
254}
255
256pub fn preview_recovery_decision(
257    scenario: RecoveryScenario,
258    ctx: &RecoveryContext,
259) -> RecoveryDecision {
260    let recipe = recipe_for(scenario);
261    let attempts = ctx.attempt_count(scenario);
262    if attempts >= recipe.max_attempts {
263        let max_attempts = recipe.max_attempts.max(1);
264        RecoveryDecision::Escalate {
265            recipe,
266            attempts_made: attempts,
267            reason: format!("max recovery attempts ({}) exhausted", max_attempts),
268        }
269    } else {
270        RecoveryDecision::Attempt(RecoveryPlan {
271            recipe,
272            next_attempt: attempts.saturating_add(1),
273        })
274    }
275}
276
277pub fn attempt_recovery(scenario: RecoveryScenario, ctx: &mut RecoveryContext) -> RecoveryDecision {
278    match preview_recovery_decision(scenario, ctx) {
279        RecoveryDecision::Attempt(plan) => {
280            ctx.attempts.insert(scenario, plan.next_attempt);
281            RecoveryDecision::Attempt(plan)
282        }
283        RecoveryDecision::Escalate {
284            recipe,
285            attempts_made,
286            reason,
287        } => RecoveryDecision::Escalate {
288            recipe,
289            attempts_made,
290            reason,
291        },
292    }
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298
299    #[test]
300    fn context_window_recipe_matches_expected_local_recovery_flow() {
301        let recipe = recipe_for(RecoveryScenario::ContextWindow);
302        assert_eq!(recipe.max_attempts, 1);
303        assert_eq!(
304            recipe.steps,
305            vec![
306                RecoveryStep::RefreshRuntimeProfile,
307                RecoveryStep::ReducePromptBudget,
308                RecoveryStep::CompactHistory,
309                RecoveryStep::NarrowRequest,
310            ]
311        );
312        assert_eq!(
313            recipe.steps_summary(),
314            "refresh_runtime_profile -> reduce_prompt_budget -> compact_history -> narrow_request"
315        );
316    }
317
318    #[test]
319    fn provider_degraded_attempts_once_then_escalates() {
320        let mut ctx = RecoveryContext::default();
321
322        let first = attempt_recovery(RecoveryScenario::ProviderDegraded, &mut ctx);
323        match first {
324            RecoveryDecision::Attempt(plan) => {
325                assert_eq!(plan.recipe.scenario, RecoveryScenario::ProviderDegraded);
326                assert_eq!(plan.next_attempt, 1);
327            }
328            other => panic!("expected attempt, got {:?}", other),
329        }
330
331        let second = attempt_recovery(RecoveryScenario::ProviderDegraded, &mut ctx);
332        match second {
333            RecoveryDecision::Escalate {
334                recipe,
335                attempts_made,
336                reason,
337            } => {
338                assert_eq!(recipe.scenario, RecoveryScenario::ProviderDegraded);
339                assert_eq!(attempts_made, 1);
340                assert!(reason.contains("max recovery attempts"));
341            }
342            other => panic!("expected escalate, got {:?}", other),
343        }
344    }
345
346    #[test]
347    fn tool_loop_recipe_stops_repetition_before_narrowing() {
348        let recipe = recipe_for(RecoveryScenario::ToolLoop);
349        assert_eq!(
350            recipe.steps,
351            vec![
352                RecoveryStep::StopRepeatingToolPattern,
353                RecoveryStep::NarrowRequest,
354            ]
355        );
356    }
357}