1use rusqlite::{Connection, params};
2use thiserror::Error;
3
4#[derive(Debug, Error)]
5pub enum HealerError {
6 #[error("database error: {0}")]
7 Database(String),
8}
9
10impl From<rusqlite::Error> for HealerError {
11 fn from(e: rusqlite::Error) -> Self {
12 HealerError::Database(e.to_string())
13 }
14}
15
16#[derive(Debug, PartialEq)]
17pub enum HealingOutcome {
18 Recovered { attempt: u32, adjustment: String },
19 Escalated { attempts: u32, last_error: String },
20}
21
22#[derive(Debug)]
23pub struct HealingEvent {
24 pub id: i64,
25 pub agent_pid: u32,
26 pub timestamp: String,
27 pub original_error: String,
28 pub adjustment: String,
29 pub outcome: String,
30 pub attempt_number: u32,
31}
32
33fn adjustment_for(attempt: u32) -> &'static str {
34 match attempt {
35 1 => "reduce_context",
36 2 => "change_strategy",
37 _ => "reset_parameters",
38 }
39}
40
41pub struct SelfHealer {
42 db: Connection,
43 max_retries: u32,
44 always_fail: bool,
45}
46
47impl SelfHealer {
48 pub fn new(db: Connection, max_retries: u32) -> Self {
49 Self { db, max_retries, always_fail: false }
50 }
51
52 pub fn new_with_simulator(db: Connection, max_retries: u32, always_fail: bool) -> Self {
53 Self { db, max_retries, always_fail }
54 }
55
56 fn log_event(&self, agent_pid: u32, original_error: &str, adjustment: &str, outcome: &str, attempt_number: u32) -> Result<(), HealerError> {
57 let ts = chrono::Utc::now().to_rfc3339();
58 self.db.execute(
59 "INSERT INTO healing_events (agent_pid, timestamp, original_error, adjustment, outcome, attempt_number) \
60 VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
61 params![agent_pid, ts, original_error, adjustment, outcome, attempt_number],
62 )?;
63 Ok(())
64 }
65
66 pub fn attempt_healing(&self, agent_pid: u32, error: &str) -> Result<HealingOutcome, HealerError> {
67 for attempt in 1..=self.max_retries {
68 let adjustment = adjustment_for(attempt);
69 let succeeded = !self.always_fail && attempt < self.max_retries;
71
72 if succeeded {
73 self.log_event(agent_pid, error, adjustment, "Success", attempt)?;
74 return Ok(HealingOutcome::Recovered { attempt, adjustment: adjustment.to_string() });
75 }
76 }
77
78 let last_adjustment = adjustment_for(self.max_retries);
79 self.log_event(agent_pid, error, last_adjustment, "Failure", self.max_retries)?;
80 Ok(HealingOutcome::Escalated { attempts: self.max_retries, last_error: error.to_string() })
81 }
82
83 pub fn get_history(&self, agent_pid: u32) -> Result<Vec<HealingEvent>, HealerError> {
84 let mut stmt = self.db.prepare(
85 "SELECT id, agent_pid, timestamp, original_error, adjustment, outcome, attempt_number \
86 FROM healing_events WHERE agent_pid = ?1 ORDER BY id ASC",
87 )?;
88 let rows = stmt.query_map(params![agent_pid], |row| {
89 Ok(HealingEvent {
90 id: row.get(0)?,
91 agent_pid: row.get(1)?,
92 timestamp: row.get(2)?,
93 original_error: row.get(3)?,
94 adjustment: row.get(4)?,
95 outcome: row.get(5)?,
96 attempt_number: row.get(6)?,
97 })
98 })?;
99 rows.collect::<Result<Vec<_>, _>>().map_err(HealerError::from)
100 }
101
102 pub fn get_all_history(&self) -> Result<Vec<HealingEvent>, HealerError> {
103 let mut stmt = self.db.prepare(
104 "SELECT id, agent_pid, timestamp, original_error, adjustment, outcome, attempt_number \
105 FROM healing_events ORDER BY id ASC",
106 )?;
107 let rows = stmt.query_map([], |row| {
108 Ok(HealingEvent {
109 id: row.get(0)?,
110 agent_pid: row.get(1)?,
111 timestamp: row.get(2)?,
112 original_error: row.get(3)?,
113 adjustment: row.get(4)?,
114 outcome: row.get(5)?,
115 attempt_number: row.get(6)?,
116 })
117 })?;
118 rows.collect::<Result<Vec<_>, _>>().map_err(HealerError::from)
119 }
120}
121
122#[cfg(test)]
123mod tests {
124 use super::*;
125 use crate::db::init_database;
126 use tempfile::NamedTempFile;
127
128 fn make_healer(max_retries: u32) -> (NamedTempFile, SelfHealer) {
129 let f = NamedTempFile::new().unwrap();
130 let db = init_database(f.path()).unwrap();
131 (f, SelfHealer::new(db, max_retries))
132 }
133
134 fn make_failing_healer(max_retries: u32) -> (NamedTempFile, SelfHealer) {
135 let f = NamedTempFile::new().unwrap();
136 let db = init_database(f.path()).unwrap();
137 (f, SelfHealer::new_with_simulator(db, max_retries, true))
138 }
139
140 #[test]
141 fn test_successful_healing_on_first_retry() {
142 let (_f, healer) = make_healer(3);
143 let outcome = healer.attempt_healing(42, "timeout error").unwrap();
144 assert!(matches!(outcome, HealingOutcome::Recovered { attempt: 1, .. }));
145 }
146
147 #[test]
148 fn test_successful_healing_records_in_db() {
149 let (_f, healer) = make_healer(3);
150 healer.attempt_healing(42, "timeout error").unwrap();
151 let history = healer.get_history(42).unwrap();
152 assert_eq!(history.len(), 1);
153 assert_eq!(history[0].outcome, "Success");
154 assert_eq!(history[0].original_error, "timeout error");
155 assert_eq!(history[0].attempt_number, 1);
156 }
157
158 #[test]
159 fn test_retry_limit_enforced_all_fail() {
160 let (_f, healer) = make_failing_healer(3);
161 let outcome = healer.attempt_healing(7, "crash").unwrap();
162 assert!(matches!(outcome, HealingOutcome::Escalated { attempts: 3, .. }));
163 }
164
165 #[test]
166 fn test_exhausted_attempts_logs_failure() {
167 let (_f, healer) = make_failing_healer(3);
168 healer.attempt_healing(7, "crash").unwrap();
169 let history = healer.get_history(7).unwrap();
170 assert_eq!(history.len(), 1);
171 assert_eq!(history[0].outcome, "Failure");
172 assert_eq!(history[0].attempt_number, 3);
173 }
174
175 #[test]
176 fn test_escalated_outcome_contains_error() {
177 let (_f, healer) = make_failing_healer(3);
178 let outcome = healer.attempt_healing(1, "oom").unwrap();
179 match outcome {
180 HealingOutcome::Escalated { last_error, .. } => assert_eq!(last_error, "oom"),
181 _ => panic!("expected Escalated"),
182 }
183 }
184
185 #[test]
186 fn test_max_retries_one_always_escalates() {
187 let (_f, healer) = make_failing_healer(1);
188 let outcome = healer.attempt_healing(99, "err").unwrap();
189 assert!(matches!(outcome, HealingOutcome::Escalated { attempts: 1, .. }));
190 }
191
192 #[test]
193 fn test_get_history_filters_by_pid() {
194 let (_f, healer) = make_failing_healer(3);
195 healer.attempt_healing(10, "err-a").unwrap();
196 healer.attempt_healing(20, "err-b").unwrap();
197 let h10 = healer.get_history(10).unwrap();
198 let h20 = healer.get_history(20).unwrap();
199 assert_eq!(h10.len(), 1);
200 assert_eq!(h20.len(), 1);
201 assert_eq!(h10[0].original_error, "err-a");
202 assert_eq!(h20[0].original_error, "err-b");
203 }
204
205 #[test]
206 fn test_get_all_history_returns_all() {
207 let (_f, healer) = make_failing_healer(3);
208 healer.attempt_healing(10, "err-a").unwrap();
209 healer.attempt_healing(20, "err-b").unwrap();
210 let all = healer.get_all_history().unwrap();
211 assert_eq!(all.len(), 2);
212 }
213
214 #[test]
215 fn test_adjustment_sequence() {
216 assert_eq!(adjustment_for(1), "reduce_context");
217 assert_eq!(adjustment_for(2), "change_strategy");
218 assert_eq!(adjustment_for(3), "reset_parameters");
219 assert_eq!(adjustment_for(10), "reset_parameters");
220 }
221
222 #[test]
223 fn test_healing_event_fields_populated() {
224 let (_f, healer) = make_healer(3);
225 healer.attempt_healing(55, "disk full").unwrap();
226 let history = healer.get_history(55).unwrap();
227 let ev = &history[0];
228 assert_eq!(ev.agent_pid, 55);
229 assert!(!ev.timestamp.is_empty());
230 assert_eq!(ev.adjustment, "reduce_context");
231 }
232}