Skip to main content

openhawk_core/
self_healer.rs

1use rusqlite::{Connection, params};
2use thiserror::Error;
3
4#[derive(Debug, Error)]
5pub enum HealerError {
6    #[error("database error: {0}")]
7    Database(String),
8}
9
10impl From<rusqlite::Error> for HealerError {
11    fn from(e: rusqlite::Error) -> Self {
12        HealerError::Database(e.to_string())
13    }
14}
15
16#[derive(Debug, PartialEq)]
17pub enum HealingOutcome {
18    Recovered { attempt: u32, adjustment: String },
19    Escalated { attempts: u32, last_error: String },
20}
21
22#[derive(Debug)]
23pub struct HealingEvent {
24    pub id: i64,
25    pub agent_pid: u32,
26    pub timestamp: String,
27    pub original_error: String,
28    pub adjustment: String,
29    pub outcome: String,
30    pub attempt_number: u32,
31}
32
33fn adjustment_for(attempt: u32) -> &'static str {
34    match attempt {
35        1 => "reduce_context",
36        2 => "change_strategy",
37        _ => "reset_parameters",
38    }
39}
40
41pub struct SelfHealer {
42    db: Connection,
43    max_retries: u32,
44    always_fail: bool,
45}
46
47impl SelfHealer {
48    pub fn new(db: Connection, max_retries: u32) -> Self {
49        Self { db, max_retries, always_fail: false }
50    }
51
52    pub fn new_with_simulator(db: Connection, max_retries: u32, always_fail: bool) -> Self {
53        Self { db, max_retries, always_fail }
54    }
55
56    fn log_event(&self, agent_pid: u32, original_error: &str, adjustment: &str, outcome: &str, attempt_number: u32) -> Result<(), HealerError> {
57        let ts = chrono::Utc::now().to_rfc3339();
58        self.db.execute(
59            "INSERT INTO healing_events (agent_pid, timestamp, original_error, adjustment, outcome, attempt_number) \
60             VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
61            params![agent_pid, ts, original_error, adjustment, outcome, attempt_number],
62        )?;
63        Ok(())
64    }
65
66    pub fn attempt_healing(&self, agent_pid: u32, error: &str) -> Result<HealingOutcome, HealerError> {
67        for attempt in 1..=self.max_retries {
68            let adjustment = adjustment_for(attempt);
69            // Simulate: succeed on first attempt unless always_fail, or if this is the last attempt
70            let succeeded = !self.always_fail && attempt < self.max_retries;
71
72            if succeeded {
73                self.log_event(agent_pid, error, adjustment, "Success", attempt)?;
74                return Ok(HealingOutcome::Recovered { attempt, adjustment: adjustment.to_string() });
75            }
76        }
77
78        let last_adjustment = adjustment_for(self.max_retries);
79        self.log_event(agent_pid, error, last_adjustment, "Failure", self.max_retries)?;
80        Ok(HealingOutcome::Escalated { attempts: self.max_retries, last_error: error.to_string() })
81    }
82
83    pub fn get_history(&self, agent_pid: u32) -> Result<Vec<HealingEvent>, HealerError> {
84        let mut stmt = self.db.prepare(
85            "SELECT id, agent_pid, timestamp, original_error, adjustment, outcome, attempt_number \
86             FROM healing_events WHERE agent_pid = ?1 ORDER BY id ASC",
87        )?;
88        let rows = stmt.query_map(params![agent_pid], |row| {
89            Ok(HealingEvent {
90                id: row.get(0)?,
91                agent_pid: row.get(1)?,
92                timestamp: row.get(2)?,
93                original_error: row.get(3)?,
94                adjustment: row.get(4)?,
95                outcome: row.get(5)?,
96                attempt_number: row.get(6)?,
97            })
98        })?;
99        rows.collect::<Result<Vec<_>, _>>().map_err(HealerError::from)
100    }
101
102    pub fn get_all_history(&self) -> Result<Vec<HealingEvent>, HealerError> {
103        let mut stmt = self.db.prepare(
104            "SELECT id, agent_pid, timestamp, original_error, adjustment, outcome, attempt_number \
105             FROM healing_events ORDER BY id ASC",
106        )?;
107        let rows = stmt.query_map([], |row| {
108            Ok(HealingEvent {
109                id: row.get(0)?,
110                agent_pid: row.get(1)?,
111                timestamp: row.get(2)?,
112                original_error: row.get(3)?,
113                adjustment: row.get(4)?,
114                outcome: row.get(5)?,
115                attempt_number: row.get(6)?,
116            })
117        })?;
118        rows.collect::<Result<Vec<_>, _>>().map_err(HealerError::from)
119    }
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125    use crate::db::init_database;
126    use tempfile::NamedTempFile;
127
128    fn make_healer(max_retries: u32) -> (NamedTempFile, SelfHealer) {
129        let f = NamedTempFile::new().unwrap();
130        let db = init_database(f.path()).unwrap();
131        (f, SelfHealer::new(db, max_retries))
132    }
133
134    fn make_failing_healer(max_retries: u32) -> (NamedTempFile, SelfHealer) {
135        let f = NamedTempFile::new().unwrap();
136        let db = init_database(f.path()).unwrap();
137        (f, SelfHealer::new_with_simulator(db, max_retries, true))
138    }
139
140    #[test]
141    fn test_successful_healing_on_first_retry() {
142        let (_f, healer) = make_healer(3);
143        let outcome = healer.attempt_healing(42, "timeout error").unwrap();
144        assert!(matches!(outcome, HealingOutcome::Recovered { attempt: 1, .. }));
145    }
146
147    #[test]
148    fn test_successful_healing_records_in_db() {
149        let (_f, healer) = make_healer(3);
150        healer.attempt_healing(42, "timeout error").unwrap();
151        let history = healer.get_history(42).unwrap();
152        assert_eq!(history.len(), 1);
153        assert_eq!(history[0].outcome, "Success");
154        assert_eq!(history[0].original_error, "timeout error");
155        assert_eq!(history[0].attempt_number, 1);
156    }
157
158    #[test]
159    fn test_retry_limit_enforced_all_fail() {
160        let (_f, healer) = make_failing_healer(3);
161        let outcome = healer.attempt_healing(7, "crash").unwrap();
162        assert!(matches!(outcome, HealingOutcome::Escalated { attempts: 3, .. }));
163    }
164
165    #[test]
166    fn test_exhausted_attempts_logs_failure() {
167        let (_f, healer) = make_failing_healer(3);
168        healer.attempt_healing(7, "crash").unwrap();
169        let history = healer.get_history(7).unwrap();
170        assert_eq!(history.len(), 1);
171        assert_eq!(history[0].outcome, "Failure");
172        assert_eq!(history[0].attempt_number, 3);
173    }
174
175    #[test]
176    fn test_escalated_outcome_contains_error() {
177        let (_f, healer) = make_failing_healer(3);
178        let outcome = healer.attempt_healing(1, "oom").unwrap();
179        match outcome {
180            HealingOutcome::Escalated { last_error, .. } => assert_eq!(last_error, "oom"),
181            _ => panic!("expected Escalated"),
182        }
183    }
184
185    #[test]
186    fn test_max_retries_one_always_escalates() {
187        let (_f, healer) = make_failing_healer(1);
188        let outcome = healer.attempt_healing(99, "err").unwrap();
189        assert!(matches!(outcome, HealingOutcome::Escalated { attempts: 1, .. }));
190    }
191
192    #[test]
193    fn test_get_history_filters_by_pid() {
194        let (_f, healer) = make_failing_healer(3);
195        healer.attempt_healing(10, "err-a").unwrap();
196        healer.attempt_healing(20, "err-b").unwrap();
197        let h10 = healer.get_history(10).unwrap();
198        let h20 = healer.get_history(20).unwrap();
199        assert_eq!(h10.len(), 1);
200        assert_eq!(h20.len(), 1);
201        assert_eq!(h10[0].original_error, "err-a");
202        assert_eq!(h20[0].original_error, "err-b");
203    }
204
205    #[test]
206    fn test_get_all_history_returns_all() {
207        let (_f, healer) = make_failing_healer(3);
208        healer.attempt_healing(10, "err-a").unwrap();
209        healer.attempt_healing(20, "err-b").unwrap();
210        let all = healer.get_all_history().unwrap();
211        assert_eq!(all.len(), 2);
212    }
213
214    #[test]
215    fn test_adjustment_sequence() {
216        assert_eq!(adjustment_for(1), "reduce_context");
217        assert_eq!(adjustment_for(2), "change_strategy");
218        assert_eq!(adjustment_for(3), "reset_parameters");
219        assert_eq!(adjustment_for(10), "reset_parameters");
220    }
221
222    #[test]
223    fn test_healing_event_fields_populated() {
224        let (_f, healer) = make_healer(3);
225        healer.attempt_healing(55, "disk full").unwrap();
226        let history = healer.get_history(55).unwrap();
227        let ev = &history[0];
228        assert_eq!(ev.agent_pid, 55);
229        assert!(!ev.timestamp.is_empty());
230        assert_eq!(ev.adjustment, "reduce_context");
231    }
232}