Skip to main content

opendev_runtime/
state_snapshot.rs

1//! Application state snapshot for crash recovery (#97).
2//!
3//! Periodically serializes essential session state to a temp file so that
4//! incomplete sessions can be detected and recovered on the next startup.
5
6use std::path::{Path, PathBuf};
7use std::time::Duration;
8
9use serde::{Deserialize, Serialize};
10use tracing::{debug, warn};
11
12/// Filename used for the crash-recovery snapshot.
13const SNAPSHOT_FILENAME: &str = "session_snapshot.json";
14
15/// Subdirectory under `~/.opendev/data/` where snapshots live.
16const SNAPSHOT_SUBDIR: &str = "recovery";
17
18/// Essential application state that is persisted for crash recovery.
19#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
20pub struct AppStateSnapshot {
21    /// The active session ID.
22    pub session_id: String,
23
24    /// Number of messages exchanged so far.
25    pub message_count: usize,
26
27    /// Last tool results (tool name -> truncated output), limited to most recent.
28    pub last_tool_results: Vec<ToolResultEntry>,
29
30    /// Timestamp (ms since epoch) when the snapshot was taken.
31    pub snapshot_timestamp_ms: u64,
32
33    /// Whether the session ended cleanly.
34    pub completed: bool,
35
36    /// Project directory associated with the session.
37    pub project_dir: String,
38
39    /// Cumulative cost in USD.
40    pub cost_usd: f64,
41}
42
43/// A single tool result entry stored in the snapshot.
44#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
45pub struct ToolResultEntry {
46    pub tool_name: String,
47    pub call_id: String,
48    /// Truncated output (first N chars).
49    pub output_preview: String,
50    pub success: bool,
51}
52
53impl AppStateSnapshot {
54    /// Create a new snapshot with the given session ID.
55    pub fn new(session_id: impl Into<String>, project_dir: impl Into<String>) -> Self {
56        Self {
57            session_id: session_id.into(),
58            message_count: 0,
59            last_tool_results: Vec::new(),
60            snapshot_timestamp_ms: crate::event_bus::now_ms(),
61            completed: false,
62            project_dir: project_dir.into(),
63            cost_usd: 0.0,
64        }
65    }
66
67    /// Record a tool result. Keeps at most `max_entries` most recent results.
68    pub fn record_tool_result(&mut self, entry: ToolResultEntry, max_entries: usize) {
69        self.last_tool_results.push(entry);
70        if self.last_tool_results.len() > max_entries {
71            let excess = self.last_tool_results.len() - max_entries;
72            self.last_tool_results.drain(..excess);
73        }
74    }
75
76    /// Mark the session as completed (clean exit).
77    pub fn mark_completed(&mut self) {
78        self.completed = true;
79        self.snapshot_timestamp_ms = crate::event_bus::now_ms();
80    }
81}
82
83// ---------------------------------------------------------------------------
84// SnapshotPersistence — read / write to disk
85// ---------------------------------------------------------------------------
86
87/// Manages reading and writing [`AppStateSnapshot`] to disk.
88#[derive(Debug, Clone)]
89pub struct SnapshotPersistence {
90    snapshot_dir: PathBuf,
91}
92
93impl SnapshotPersistence {
94    /// Create a persistence manager using the default snapshot directory.
95    pub fn new() -> Self {
96        let snapshot_dir = dirs_next::home_dir()
97            .unwrap_or_else(|| PathBuf::from("/tmp"))
98            .join(".opendev")
99            .join("data")
100            .join(SNAPSHOT_SUBDIR);
101
102        Self { snapshot_dir }
103    }
104
105    /// Create a persistence manager using a custom directory (useful for tests).
106    pub fn with_dir(dir: impl Into<PathBuf>) -> Self {
107        Self {
108            snapshot_dir: dir.into(),
109        }
110    }
111
112    /// Return the path where a session's snapshot would be stored.
113    pub fn snapshot_path(&self, session_id: &str) -> PathBuf {
114        self.snapshot_dir
115            .join(format!("{session_id}_{SNAPSHOT_FILENAME}"))
116    }
117
118    /// Save a snapshot to disk atomically (write tmp then rename).
119    pub fn save(&self, snapshot: &AppStateSnapshot) -> Result<(), String> {
120        std::fs::create_dir_all(&self.snapshot_dir)
121            .map_err(|e| format!("Failed to create snapshot dir: {e}"))?;
122
123        let path = self.snapshot_path(&snapshot.session_id);
124        let tmp_path = path.with_extension("json.tmp");
125
126        let json = serde_json::to_string_pretty(snapshot)
127            .map_err(|e| format!("Failed to serialize snapshot: {e}"))?;
128
129        std::fs::write(&tmp_path, &json)
130            .map_err(|e| format!("Failed to write snapshot tmp: {e}"))?;
131
132        std::fs::rename(&tmp_path, &path).map_err(|e| format!("Failed to rename snapshot: {e}"))?;
133
134        debug!("Saved snapshot for session {}", snapshot.session_id);
135        Ok(())
136    }
137
138    /// Load a snapshot for a specific session.
139    pub fn load(&self, session_id: &str) -> Option<AppStateSnapshot> {
140        let path = self.snapshot_path(session_id);
141        self.load_from_path(&path)
142    }
143
144    /// Load a snapshot from a specific path.
145    fn load_from_path(&self, path: &Path) -> Option<AppStateSnapshot> {
146        let contents = std::fs::read_to_string(path).ok()?;
147        serde_json::from_str(&contents).ok()
148    }
149
150    /// Find all incomplete (non-completed) session snapshots.
151    ///
152    /// Returns snapshots where `completed == false`, sorted by timestamp
153    /// (most recent first).
154    pub fn find_incomplete_sessions(&self) -> Vec<AppStateSnapshot> {
155        let mut snapshots = Vec::new();
156
157        let entries = match std::fs::read_dir(&self.snapshot_dir) {
158            Ok(e) => e,
159            Err(_) => return snapshots,
160        };
161
162        for entry in entries.flatten() {
163            let path = entry.path();
164            if path.extension().is_some_and(|e| e == "json")
165                && let Some(snapshot) = self.load_from_path(&path)
166                && !snapshot.completed
167            {
168                snapshots.push(snapshot);
169            }
170        }
171
172        snapshots.sort_by(|a, b| b.snapshot_timestamp_ms.cmp(&a.snapshot_timestamp_ms));
173        snapshots
174    }
175
176    /// Remove the snapshot file for a session (e.g., after clean exit or recovery).
177    pub fn remove(&self, session_id: &str) -> bool {
178        let path = self.snapshot_path(session_id);
179        match std::fs::remove_file(&path) {
180            Ok(()) => {
181                debug!("Removed snapshot for session {session_id}");
182                true
183            }
184            Err(e) => {
185                if e.kind() != std::io::ErrorKind::NotFound {
186                    warn!("Failed to remove snapshot {}: {e}", path.display());
187                }
188                false
189            }
190        }
191    }
192
193    /// Remove snapshots older than `max_age`.
194    pub fn cleanup_old(&self, max_age: Duration) -> usize {
195        let cutoff_ms = crate::event_bus::now_ms().saturating_sub(max_age.as_millis() as u64);
196        let mut removed = 0;
197
198        let entries = match std::fs::read_dir(&self.snapshot_dir) {
199            Ok(e) => e,
200            Err(_) => return 0,
201        };
202
203        for entry in entries.flatten() {
204            let path = entry.path();
205            if path.extension().is_some_and(|e| e == "json")
206                && let Some(snapshot) = self.load_from_path(&path)
207                && snapshot.snapshot_timestamp_ms < cutoff_ms
208                && std::fs::remove_file(&path).is_ok()
209            {
210                removed += 1;
211            }
212        }
213
214        removed
215    }
216}
217
218impl Default for SnapshotPersistence {
219    fn default() -> Self {
220        Self::new()
221    }
222}
223
224#[cfg(test)]
225#[path = "state_snapshot_tests.rs"]
226mod tests;