Skip to main content

libbrat_grite/
reconcile.rs

1//! Session reconciliation for crash recovery.
2//!
3//! This module provides functions to reconcile the expected session state
4//! (from Grite) with the actual state (from the engine). This is used during
5//! harness startup and periodic health sweeps to detect and recover from
6//! crashed sessions.
7
8use std::collections::{HashMap, HashSet};
9
10use crate::types::{Session, SessionStatus};
11use crate::GriteClient;
12use crate::GriteError;
13
14/// Action to take for reconciliation.
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum ReconciliationAction {
17    /// Session exists in both Grit and engine, states match.
18    InSync {
19        session_id: String,
20    },
21
22    /// Session exists in Grit but not in engine - mark as crashed.
23    MarkCrashed {
24        session_id: String,
25        grite_issue_id: String,
26        task_id: String,
27    },
28
29    /// Session exists in engine but not in Grit - orphaned session.
30    /// This is unusual and should be logged as a warning.
31    Orphaned {
32        session_id: String,
33    },
34
35    /// Session status in Grit doesn't match engine reality.
36    UpdateStatus {
37        session_id: String,
38        current_status: SessionStatus,
39        new_status: SessionStatus,
40    },
41}
42
43/// Result of reconciliation for a single repo.
44#[derive(Debug, Default)]
45pub struct ReconciliationResult {
46    /// Actions to take.
47    pub actions: Vec<ReconciliationAction>,
48
49    /// Number of sessions that are in sync.
50    pub in_sync_count: usize,
51
52    /// Number of sessions marked as crashed.
53    pub crashed_count: usize,
54
55    /// Number of orphaned sessions.
56    pub orphaned_count: usize,
57
58    /// Number of status updates needed.
59    pub status_update_count: usize,
60}
61
62impl ReconciliationResult {
63    /// Returns true if all sessions are in sync (no actions needed).
64    pub fn is_clean(&self) -> bool {
65        self.crashed_count == 0 && self.orphaned_count == 0 && self.status_update_count == 0
66    }
67}
68
69/// Session info from the engine (minimal info needed for reconciliation).
70#[derive(Debug, Clone)]
71pub struct EngineSessionInfo {
72    /// Session ID.
73    pub session_id: String,
74
75    /// Whether the session process is still alive.
76    pub alive: bool,
77
78    /// Exit code if the session has exited.
79    pub exit_code: Option<i32>,
80}
81
82/// Reconcile Grite state with engine state.
83///
84/// This compares the sessions recorded in Grit with the sessions running
85/// in the engine and returns a list of actions to bring them in sync.
86///
87/// # Arguments
88///
89/// * `grite_client` - Client for querying Gritee state
90/// * `engine_sessions` - Current sessions from the engine
91///
92/// # Returns
93///
94/// A `ReconciliationResult` with the list of actions to take.
95pub fn reconcile_sessions(
96    grite_client: &GriteClient,
97    engine_sessions: &[EngineSessionInfo],
98) -> Result<ReconciliationResult, GriteError> {
99    let mut result = ReconciliationResult::default();
100
101    // Step 1: Get all active sessions from Grite (not in Exit state)
102    let grite_sessions = grite_client.session_list(None)?;
103    let grite_active: HashMap<String, Session> = grite_sessions
104        .into_iter()
105        .filter(|s| s.status != SessionStatus::Exit)
106        .map(|s| (s.session_id.clone(), s))
107        .collect();
108
109    // Step 2: Build set of engine session IDs (for potential future use)
110    let _engine_ids: HashSet<&str> = engine_sessions
111        .iter()
112        .map(|s| s.session_id.as_str())
113        .collect();
114
115    // Step 3: Check sessions in Grit
116    for (session_id, session) in &grite_active {
117        if let Some(engine_info) = engine_sessions.iter().find(|e| e.session_id == *session_id) {
118            // Session exists in both - check if alive
119            if engine_info.alive {
120                // In sync
121                result.actions.push(ReconciliationAction::InSync {
122                    session_id: session_id.clone(),
123                });
124                result.in_sync_count += 1;
125            } else {
126                // Engine says dead but Grit shows active - mark crashed
127                result.actions.push(ReconciliationAction::MarkCrashed {
128                    session_id: session_id.clone(),
129                    grite_issue_id: session.grite_issue_id.clone(),
130                    task_id: session.task_id.clone(),
131                });
132                result.crashed_count += 1;
133            }
134        } else {
135            // Session in Grit but not in engine - crashed
136            result.actions.push(ReconciliationAction::MarkCrashed {
137                session_id: session_id.clone(),
138                grite_issue_id: session.grite_issue_id.clone(),
139                task_id: session.task_id.clone(),
140            });
141            result.crashed_count += 1;
142        }
143    }
144
145    // Step 4: Check for orphaned sessions (in engine but not in Grit)
146    for engine_info in engine_sessions {
147        if !grite_active.contains_key(&engine_info.session_id) {
148            result.actions.push(ReconciliationAction::Orphaned {
149                session_id: engine_info.session_id.clone(),
150            });
151            result.orphaned_count += 1;
152        }
153    }
154
155    Ok(result)
156}
157
158/// Execute reconciliation actions.
159///
160/// This applies the reconciliation actions to update Grite state.
161///
162/// # Arguments
163///
164/// * `grite_client` - Client for updating Gritee state
165/// * `actions` - Actions to execute
166///
167/// # Returns
168///
169/// Number of successful actions, and any errors encountered.
170pub fn execute_reconciliation(
171    grite_client: &GriteClient,
172    actions: &[ReconciliationAction],
173) -> (usize, Vec<GriteError>) {
174    let mut success_count = 0;
175    let mut errors = Vec::new();
176
177    for action in actions {
178        match action {
179            ReconciliationAction::MarkCrashed { session_id, .. } => {
180                match grite_client.session_exit(session_id, -1, "crash", None) {
181                    Ok(()) => success_count += 1,
182                    Err(e) => errors.push(e),
183                }
184            }
185            ReconciliationAction::UpdateStatus {
186                session_id,
187                new_status,
188                ..
189            } => {
190                // Force update since we're reconciling
191                match grite_client.session_update_status_with_options(session_id, *new_status, true)
192                {
193                    Ok(()) => success_count += 1,
194                    Err(e) => errors.push(e),
195                }
196            }
197            ReconciliationAction::InSync { .. } => {
198                // No action needed
199                success_count += 1;
200            }
201            ReconciliationAction::Orphaned { session_id } => {
202                // Log warning but don't fail - we can't create a Grit record
203                // without task context
204                eprintln!(
205                    "Warning: orphaned session {} found in engine but not in Grit",
206                    session_id
207                );
208                success_count += 1;
209            }
210        }
211    }
212
213    (success_count, errors)
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    #[test]
221    fn test_reconciliation_result_is_clean() {
222        let clean = ReconciliationResult::default();
223        assert!(clean.is_clean());
224
225        let not_clean = ReconciliationResult {
226            crashed_count: 1,
227            ..Default::default()
228        };
229        assert!(!not_clean.is_clean());
230    }
231
232    #[test]
233    fn test_engine_session_info() {
234        let info = EngineSessionInfo {
235            session_id: "s-20250117-test".to_string(),
236            alive: true,
237            exit_code: None,
238        };
239        assert!(info.alive);
240
241        let dead_info = EngineSessionInfo {
242            session_id: "s-20250117-dead".to_string(),
243            alive: false,
244            exit_code: Some(1),
245        };
246        assert!(!dead_info.alive);
247        assert_eq!(dead_info.exit_code, Some(1));
248    }
249}