Skip to main content

ralph_workflow/checkpoint/execution_history/
file_snapshot.rs

1/// Default threshold for storing file content in snapshots (10KB).
2///
3/// Files smaller than this threshold will have their full content stored
4/// in the checkpoint for automatic recovery on resume.
5const DEFAULT_CONTENT_THRESHOLD: u64 = 10 * 1024;
6
7/// Maximum file size that will be compressed in snapshots (100KB).
8///
9/// Files between `DEFAULT_CONTENT_THRESHOLD` and this size that are key files
10/// (PROMPT.md, PLAN.md, ISSUES.md) will be compressed before storing.
11const MAX_COMPRESS_SIZE: u64 = 100 * 1024;
12
13/// Snapshot of a file's state at a point in time.
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
15pub struct FileSnapshot {
16    /// Path to the file
17    pub path: String,
18    /// SHA-256 checksum of file contents
19    pub checksum: String,
20    /// File size in bytes
21    pub size: u64,
22    /// For small files (< 10KB by default), store full content
23    pub content: Option<String>,
24    /// Compressed content (base64-encoded gzip) for larger key files
25    pub compressed_content: Option<String>,
26    /// Whether the file existed
27    pub exists: bool,
28}
29
30impl FileSnapshot {
31    /// Create a new file snapshot with the default content threshold (10KB).
32    ///
33    /// This version does not capture file content (content and `compressed_content` will be None).
34    /// Use `from_workspace` to create a snapshot with content from a workspace.
35    #[must_use]
36    pub fn new(path: &str, checksum: String, size: u64, exists: bool) -> Self {
37        Self {
38            path: path.to_string(),
39            checksum,
40            size,
41            content: None,
42            compressed_content: None,
43            exists,
44        }
45    }
46
47    /// Create a file snapshot from a workspace using the default content threshold (10KB).
48    ///
49    /// Files smaller than 10KB will have their content stored.
50    /// Key files (PROMPT.md, PLAN.md, ISSUES.md, NOTES.md) may be compressed if they
51    /// are between 10KB and 100KB.
52    pub fn from_workspace_default(
53        workspace: &dyn Workspace,
54        path: &str,
55        checksum: String,
56        size: u64,
57        exists: bool,
58    ) -> Self {
59        Self::from_workspace(
60            workspace,
61            path,
62            checksum,
63            size,
64            exists,
65            DEFAULT_CONTENT_THRESHOLD,
66        )
67    }
68
69    /// Create a file snapshot from a workspace, optionally capturing content.
70    ///
71    /// Files smaller than `max_size` bytes will have their content stored.
72    /// Key files (PROMPT.md, PLAN.md, ISSUES.md, NOTES.md) may be compressed if they
73    /// are between `max_size` and `MAX_COMPRESS_SIZE`.
74    pub fn from_workspace(
75        workspace: &dyn Workspace,
76        path: &str,
77        checksum: String,
78        size: u64,
79        exists: bool,
80        max_size: u64,
81    ) -> Self {
82        let mut content = None;
83        let mut compressed_content = None;
84
85        if exists {
86            let is_key_file = path.contains("PROMPT.md")
87                || path.contains("PLAN.md")
88                || path.contains("ISSUES.md")
89                || path.contains("NOTES.md");
90
91            let path_ref = Path::new(path);
92
93            if size < max_size {
94                // For small files, read and store content directly
95                content = workspace.read(path_ref).ok();
96            } else if is_key_file && size < MAX_COMPRESS_SIZE {
97                // For larger key files, compress the content
98                if let Ok(data) = workspace.read_bytes(path_ref) {
99                    compressed_content = compress_data(&data).ok();
100                }
101            }
102        }
103
104        Self {
105            path: path.to_string(),
106            checksum,
107            size,
108            content,
109            compressed_content,
110            exists,
111        }
112    }
113
114    /// Get the file content, decompressing if necessary.
115    #[must_use]
116    pub fn get_content(&self) -> Option<String> {
117        self.content.clone().or_else(|| {
118            self.compressed_content
119                .as_ref()
120                .and_then(|compressed| decompress_data(compressed).ok())
121        })
122    }
123
124    /// Create a snapshot for a non-existent file.
125    #[must_use]
126    pub fn not_found(path: &str) -> Self {
127        Self {
128            path: path.to_string(),
129            checksum: String::new(),
130            size: 0,
131            content: None,
132            compressed_content: None,
133            exists: false,
134        }
135    }
136
137    /// Verify that the current file state matches this snapshot using a workspace.
138    pub fn verify_with_workspace(&self, workspace: &dyn Workspace) -> bool {
139        let path = Path::new(&self.path);
140
141        if !self.exists {
142            return !workspace.exists(path);
143        }
144
145        let Ok(content) = workspace.read_bytes(path) else {
146            return false;
147        };
148
149        if content.len() as u64 != self.size {
150            return false;
151        }
152
153        let checksum = crate::checkpoint::state::calculate_checksum_from_bytes(&content);
154        checksum == self.checksum
155    }
156}
157
158/// Compress data using gzip and encode as base64.
159///
160/// This is used to store larger file content in checkpoints without
161/// bloating the checkpoint file size too much.
162fn compress_data(data: &[u8]) -> Result<String, std::io::Error> {
163    use base64::{engine::general_purpose::STANDARD, Engine};
164    use flate2::write::GzEncoder;
165    use flate2::Compression;
166    use std::io::Write;
167
168    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
169    encoder.write_all(data)?;
170    let compressed = encoder.finish()?;
171
172    Ok(STANDARD.encode(&compressed))
173}
174
175const MAX_DECOMPRESSED_SNAPSHOT_BYTES: usize = 1024 * 1024;
176
177/// Decompress data that was compressed with `compress_data`.
178fn decompress_data(encoded: &str) -> Result<String, std::io::Error> {
179    use base64::{engine::general_purpose::STANDARD, Engine};
180    use flate2::read::GzDecoder;
181    use std::io::Read;
182
183    let compressed = STANDARD.decode(encoded).map_err(|e| {
184        std::io::Error::new(
185            std::io::ErrorKind::InvalidData,
186            format!("Base64 decode error: {e}"),
187        )
188    })?;
189
190    let mut decoder = GzDecoder::new(compressed.as_slice());
191    let mut decompressed = Vec::new();
192    let mut buf = [0u8; 8 * 1024];
193
194    loop {
195        let n = decoder.read(&mut buf)?;
196        if n == 0 {
197            break;
198        }
199
200        if decompressed.len().saturating_add(n) > MAX_DECOMPRESSED_SNAPSHOT_BYTES {
201            return Err(std::io::Error::new(
202                std::io::ErrorKind::InvalidData,
203                format!(
204                    "Decompressed payload exceeds max size ({MAX_DECOMPRESSED_SNAPSHOT_BYTES} bytes)"
205                ),
206            ));
207        }
208
209        decompressed.extend_from_slice(&buf[..n]);
210    }
211
212    String::from_utf8(decompressed).map_err(|e| {
213        std::io::Error::new(
214            std::io::ErrorKind::InvalidData,
215            format!("UTF-8 decode error: {e}"),
216        )
217    })
218}