chie_core/
checkpoint.rs

1//! Checkpoint-based state recovery system.
2//!
3//! This module provides mechanisms for saving and restoring system state
4//! to enable crash recovery and resilience. Checkpoints can be created
5//! periodically and restored after a failure.
6//!
7//! # Example
8//!
9//! ```rust
10//! use chie_core::checkpoint::{CheckpointManager, CheckpointConfig, Checkpointable};
11//! use std::path::PathBuf;
12//! use serde::{Serialize, Deserialize};
13//!
14//! #[derive(Serialize, Deserialize, Clone)]
15//! struct MyState {
16//!     counter: u64,
17//!     name: String,
18//! }
19//!
20//! impl Checkpointable for MyState {
21//!     fn checkpoint_id(&self) -> String {
22//!         format!("mystate_{}", self.counter)
23//!     }
24//! }
25//!
26//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
27//! let config = CheckpointConfig {
28//!     base_path: PathBuf::from("/tmp/checkpoints"),
29//!     max_checkpoints: 5,
30//!     compression_enabled: true,
31//! };
32//!
33//! let mut manager = CheckpointManager::new(config)?;
34//!
35//! let state = MyState {
36//!     counter: 42,
37//!     name: "test".to_string(),
38//! };
39//!
40//! // Save checkpoint
41//! manager.save_checkpoint(&state)?;
42//!
43//! // Restore latest checkpoint
44//! let restored: MyState = manager.restore_latest()?;
45//! assert_eq!(restored.counter, 42);
46//! # Ok(())
47//! # }
48//! ```
49
50use serde::{Deserialize, Serialize};
51use std::fs::{self, File};
52use std::io::{self, Read, Write};
53use std::path::PathBuf;
54use std::time::{SystemTime, UNIX_EPOCH};
55use thiserror::Error;
56
57/// Checkpoint error types.
58#[derive(Debug, Error)]
59pub enum CheckpointError {
60    #[error("IO error: {0}")]
61    Io(#[from] io::Error),
62
63    #[error("Serialization error: {0}")]
64    Serialization(String),
65
66    #[error("Deserialization error: {0}")]
67    Deserialization(String),
68
69    #[error("No checkpoints available")]
70    NoCheckpointsAvailable,
71
72    #[error("Checkpoint not found: {0}")]
73    CheckpointNotFound(String),
74
75    #[error("Invalid checkpoint data")]
76    InvalidCheckpointData,
77}
78
79/// Configuration for checkpoint management.
80#[derive(Debug, Clone)]
81pub struct CheckpointConfig {
82    /// Base directory for storing checkpoints.
83    pub base_path: PathBuf,
84    /// Maximum number of checkpoints to retain (oldest are deleted).
85    pub max_checkpoints: usize,
86    /// Enable compression for checkpoint files.
87    pub compression_enabled: bool,
88}
89
90impl Default for CheckpointConfig {
91    #[inline]
92    fn default() -> Self {
93        Self {
94            base_path: PathBuf::from("./checkpoints"),
95            max_checkpoints: 10,
96            compression_enabled: true,
97        }
98    }
99}
100
101/// Trait for objects that can be checkpointed.
102pub trait Checkpointable: Serialize + for<'de> Deserialize<'de> {
103    /// Get a unique identifier for this checkpoint.
104    fn checkpoint_id(&self) -> String;
105}
106
107/// Metadata about a checkpoint.
108#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct CheckpointMetadata {
110    /// Checkpoint identifier.
111    pub id: String,
112    /// Timestamp when created (Unix milliseconds).
113    pub timestamp_ms: i64,
114    /// Size in bytes.
115    pub size_bytes: u64,
116    /// Whether this checkpoint is compressed.
117    pub compressed: bool,
118}
119
120impl CheckpointMetadata {
121    /// Get the age of this checkpoint in milliseconds.
122    #[must_use]
123    #[inline]
124    pub fn age_ms(&self) -> i64 {
125        let now = SystemTime::now()
126            .duration_since(UNIX_EPOCH)
127            .unwrap_or_default()
128            .as_millis() as i64;
129        now - self.timestamp_ms
130    }
131}
132
133/// Manages checkpoint creation and restoration.
134pub struct CheckpointManager {
135    config: CheckpointConfig,
136    checkpoints: Vec<CheckpointMetadata>,
137}
138
139impl CheckpointManager {
140    /// Create a new checkpoint manager.
141    pub fn new(config: CheckpointConfig) -> Result<Self, CheckpointError> {
142        // Create base directory if it doesn't exist
143        fs::create_dir_all(&config.base_path)?;
144
145        let mut manager = Self {
146            config,
147            checkpoints: Vec::new(),
148        };
149
150        // Load existing checkpoint metadata
151        manager.scan_checkpoints()?;
152
153        Ok(manager)
154    }
155
156    /// Scan the checkpoint directory and load metadata.
157    fn scan_checkpoints(&mut self) -> Result<(), CheckpointError> {
158        self.checkpoints.clear();
159
160        let entries = fs::read_dir(&self.config.base_path)?;
161
162        for entry in entries.flatten() {
163            let path = entry.path();
164
165            if path.extension().and_then(|s| s.to_str()) == Some("meta") {
166                if let Ok(meta_content) = fs::read_to_string(&path) {
167                    if let Ok(metadata) = serde_json::from_str::<CheckpointMetadata>(&meta_content)
168                    {
169                        self.checkpoints.push(metadata);
170                    }
171                }
172            }
173        }
174
175        // Sort by timestamp (newest first)
176        self.checkpoints
177            .sort_by(|a, b| b.timestamp_ms.cmp(&a.timestamp_ms));
178
179        Ok(())
180    }
181
182    /// Save a checkpoint of the given state.
183    pub fn save_checkpoint<T: Checkpointable>(&mut self, state: &T) -> Result<(), CheckpointError> {
184        let id = state.checkpoint_id();
185        let timestamp_ms = SystemTime::now()
186            .duration_since(UNIX_EPOCH)
187            .unwrap_or_default()
188            .as_millis() as i64;
189
190        // Serialize the state
191        let serialized =
192            serde_json::to_vec(state).map_err(|e| CheckpointError::Serialization(e.to_string()))?;
193
194        let data = if self.config.compression_enabled {
195            // Simple compression placeholder (in production, use flate2 or zstd)
196            serialized
197        } else {
198            serialized
199        };
200
201        // Write checkpoint data
202        let checkpoint_path = self.config.base_path.join(format!("{}.ckpt", id));
203        let mut file = File::create(&checkpoint_path)?;
204        file.write_all(&data)?;
205
206        let size_bytes = data.len() as u64;
207
208        // Write metadata
209        let metadata = CheckpointMetadata {
210            id: id.clone(),
211            timestamp_ms,
212            size_bytes,
213            compressed: self.config.compression_enabled,
214        };
215
216        let meta_path = self.config.base_path.join(format!("{}.meta", id));
217        let meta_json = serde_json::to_string_pretty(&metadata)
218            .map_err(|e| CheckpointError::Serialization(e.to_string()))?;
219        fs::write(meta_path, meta_json)?;
220
221        // Update checkpoint list
222        self.checkpoints.insert(0, metadata);
223
224        // Clean up old checkpoints
225        self.cleanup_old_checkpoints()?;
226
227        Ok(())
228    }
229
230    /// Restore state from the latest checkpoint.
231    pub fn restore_latest<T: Checkpointable>(&self) -> Result<T, CheckpointError> {
232        let metadata = self
233            .checkpoints
234            .first()
235            .ok_or(CheckpointError::NoCheckpointsAvailable)?;
236
237        self.restore_checkpoint(&metadata.id)
238    }
239
240    /// Restore state from a specific checkpoint.
241    pub fn restore_checkpoint<T: Checkpointable>(&self, id: &str) -> Result<T, CheckpointError> {
242        let checkpoint_path = self.config.base_path.join(format!("{}.ckpt", id));
243
244        if !checkpoint_path.exists() {
245            return Err(CheckpointError::CheckpointNotFound(id.to_string()));
246        }
247
248        // Read checkpoint data
249        let mut file = File::open(&checkpoint_path)?;
250        let mut data = Vec::new();
251        file.read_to_end(&mut data)?;
252
253        // Decompress if needed
254        let decompressed = if self.config.compression_enabled {
255            // Simple decompression placeholder
256            data
257        } else {
258            data
259        };
260
261        // Deserialize
262        serde_json::from_slice(&decompressed)
263            .map_err(|e| CheckpointError::Deserialization(e.to_string()))
264    }
265
266    /// Clean up old checkpoints beyond the configured limit.
267    fn cleanup_old_checkpoints(&mut self) -> Result<(), CheckpointError> {
268        while self.checkpoints.len() > self.config.max_checkpoints {
269            if let Some(old_checkpoint) = self.checkpoints.pop() {
270                // Delete checkpoint file
271                let checkpoint_path = self
272                    .config
273                    .base_path
274                    .join(format!("{}.ckpt", old_checkpoint.id));
275                let _ = fs::remove_file(checkpoint_path);
276
277                // Delete metadata file
278                let meta_path = self
279                    .config
280                    .base_path
281                    .join(format!("{}.meta", old_checkpoint.id));
282                let _ = fs::remove_file(meta_path);
283            }
284        }
285
286        Ok(())
287    }
288
289    /// Get metadata for all available checkpoints.
290    #[must_use]
291    #[inline]
292    pub fn list_checkpoints(&self) -> &[CheckpointMetadata] {
293        &self.checkpoints
294    }
295
296    /// Get the number of available checkpoints.
297    #[must_use]
298    #[inline]
299    pub fn checkpoint_count(&self) -> usize {
300        self.checkpoints.len()
301    }
302
303    /// Delete a specific checkpoint.
304    pub fn delete_checkpoint(&mut self, id: &str) -> Result<(), CheckpointError> {
305        // Remove from list
306        self.checkpoints.retain(|ckpt| ckpt.id != id);
307
308        // Delete files
309        let checkpoint_path = self.config.base_path.join(format!("{}.ckpt", id));
310        let meta_path = self.config.base_path.join(format!("{}.meta", id));
311
312        let _ = fs::remove_file(checkpoint_path);
313        let _ = fs::remove_file(meta_path);
314
315        Ok(())
316    }
317
318    /// Delete all checkpoints.
319    pub fn clear_all(&mut self) -> Result<(), CheckpointError> {
320        for checkpoint in &self.checkpoints {
321            let checkpoint_path = self
322                .config
323                .base_path
324                .join(format!("{}.ckpt", checkpoint.id));
325            let meta_path = self
326                .config
327                .base_path
328                .join(format!("{}.meta", checkpoint.id));
329
330            let _ = fs::remove_file(checkpoint_path);
331            let _ = fs::remove_file(meta_path);
332        }
333
334        self.checkpoints.clear();
335
336        Ok(())
337    }
338
339    /// Get total size of all checkpoints in bytes.
340    #[must_use]
341    #[inline]
342    pub fn total_size_bytes(&self) -> u64 {
343        self.checkpoints.iter().map(|c| c.size_bytes).sum()
344    }
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350    use serde::{Deserialize, Serialize};
351    use tempfile::TempDir;
352
353    #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
354    struct TestState {
355        counter: u64,
356        name: String,
357        values: Vec<i32>,
358    }
359
360    impl Checkpointable for TestState {
361        fn checkpoint_id(&self) -> String {
362            format!("test_{}", self.counter)
363        }
364    }
365
366    #[test]
367    fn test_checkpoint_save_and_restore() {
368        let temp_dir = TempDir::new().unwrap();
369
370        let config = CheckpointConfig {
371            base_path: temp_dir.path().to_path_buf(),
372            max_checkpoints: 5,
373            compression_enabled: false,
374        };
375
376        let mut manager = CheckpointManager::new(config).unwrap();
377
378        let state = TestState {
379            counter: 42,
380            name: "test".to_string(),
381            values: vec![1, 2, 3],
382        };
383
384        // Save checkpoint
385        manager.save_checkpoint(&state).unwrap();
386        assert_eq!(manager.checkpoint_count(), 1);
387
388        // Restore checkpoint
389        let restored: TestState = manager.restore_latest().unwrap();
390        assert_eq!(restored, state);
391    }
392
393    #[test]
394    fn test_multiple_checkpoints() {
395        let temp_dir = TempDir::new().unwrap();
396
397        let config = CheckpointConfig {
398            base_path: temp_dir.path().to_path_buf(),
399            max_checkpoints: 3,
400            compression_enabled: false,
401        };
402
403        let mut manager = CheckpointManager::new(config).unwrap();
404
405        // Create multiple checkpoints
406        for i in 0..5 {
407            let state = TestState {
408                counter: i,
409                name: format!("state_{}", i),
410                values: vec![i as i32],
411            };
412            manager.save_checkpoint(&state).unwrap();
413        }
414
415        // Should only keep the latest 3
416        assert_eq!(manager.checkpoint_count(), 3);
417
418        // Latest should be state 4
419        let latest: TestState = manager.restore_latest().unwrap();
420        assert_eq!(latest.counter, 4);
421    }
422
423    #[test]
424    fn test_checkpoint_deletion() {
425        let temp_dir = TempDir::new().unwrap();
426
427        let config = CheckpointConfig {
428            base_path: temp_dir.path().to_path_buf(),
429            max_checkpoints: 5,
430            compression_enabled: false,
431        };
432
433        let mut manager = CheckpointManager::new(config).unwrap();
434
435        let state = TestState {
436            counter: 100,
437            name: "delete_me".to_string(),
438            values: vec![],
439        };
440
441        manager.save_checkpoint(&state).unwrap();
442        assert_eq!(manager.checkpoint_count(), 1);
443
444        // Delete the checkpoint
445        let id = state.checkpoint_id();
446        manager.delete_checkpoint(&id).unwrap();
447        assert_eq!(manager.checkpoint_count(), 0);
448    }
449
450    #[test]
451    fn test_checkpoint_metadata() {
452        let temp_dir = TempDir::new().unwrap();
453
454        let config = CheckpointConfig {
455            base_path: temp_dir.path().to_path_buf(),
456            max_checkpoints: 5,
457            compression_enabled: false,
458        };
459
460        let mut manager = CheckpointManager::new(config).unwrap();
461
462        let state = TestState {
463            counter: 999,
464            name: "metadata_test".to_string(),
465            values: vec![1, 2, 3, 4, 5],
466        };
467
468        manager.save_checkpoint(&state).unwrap();
469
470        let checkpoints = manager.list_checkpoints();
471        assert_eq!(checkpoints.len(), 1);
472        assert_eq!(checkpoints[0].id, "test_999");
473        assert!(checkpoints[0].size_bytes > 0);
474        assert!(checkpoints[0].age_ms() >= 0);
475    }
476
477    #[test]
478    fn test_total_size_calculation() {
479        let temp_dir = TempDir::new().unwrap();
480
481        let config = CheckpointConfig {
482            base_path: temp_dir.path().to_path_buf(),
483            max_checkpoints: 5,
484            compression_enabled: false,
485        };
486
487        let mut manager = CheckpointManager::new(config).unwrap();
488
489        for i in 0..3 {
490            let state = TestState {
491                counter: i,
492                name: format!("state_{}", i),
493                values: vec![i as i32; 10],
494            };
495            manager.save_checkpoint(&state).unwrap();
496        }
497
498        let total_size = manager.total_size_bytes();
499        assert!(total_size > 0);
500    }
501
502    #[test]
503    fn test_restore_specific_checkpoint() {
504        let temp_dir = TempDir::new().unwrap();
505
506        let config = CheckpointConfig {
507            base_path: temp_dir.path().to_path_buf(),
508            max_checkpoints: 5,
509            compression_enabled: false,
510        };
511
512        let mut manager = CheckpointManager::new(config).unwrap();
513
514        // Create multiple checkpoints
515        let states: Vec<TestState> = (0..3)
516            .map(|i| TestState {
517                counter: i,
518                name: format!("state_{}", i),
519                values: vec![i as i32],
520            })
521            .collect();
522
523        for state in &states {
524            manager.save_checkpoint(state).unwrap();
525        }
526
527        // Restore specific checkpoint (state 1)
528        let restored: TestState = manager.restore_checkpoint("test_1").unwrap();
529        assert_eq!(restored.counter, 1);
530        assert_eq!(restored.name, "state_1");
531    }
532
533    #[test]
534    fn test_no_checkpoints_error() {
535        let temp_dir = TempDir::new().unwrap();
536
537        let config = CheckpointConfig {
538            base_path: temp_dir.path().to_path_buf(),
539            max_checkpoints: 5,
540            compression_enabled: false,
541        };
542
543        let manager = CheckpointManager::new(config).unwrap();
544
545        // Should fail when no checkpoints exist
546        let result: Result<TestState, _> = manager.restore_latest();
547        assert!(result.is_err());
548        assert!(matches!(
549            result.unwrap_err(),
550            CheckpointError::NoCheckpointsAvailable
551        ));
552    }
553}