ralph_workflow/checkpoint/execution_history/file_snapshot.rs
1/// Default threshold for storing file content in snapshots (10KB).
2///
3/// Files smaller than this threshold will have their full content stored
4/// in the checkpoint for automatic recovery on resume.
5const DEFAULT_CONTENT_THRESHOLD: u64 = 10 * 1024;
6
7/// Maximum file size that will be compressed in snapshots (100KB).
8///
9/// Files between `DEFAULT_CONTENT_THRESHOLD` and this size that are key files
10/// (PROMPT.md, PLAN.md, ISSUES.md) will be compressed before storing.
11const MAX_COMPRESS_SIZE: u64 = 100 * 1024;
12
13/// Snapshot of a file's state at a point in time.
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
15pub struct FileSnapshot {
16 /// Path to the file
17 pub path: String,
18 /// SHA-256 checksum of file contents
19 pub checksum: String,
20 /// File size in bytes
21 pub size: u64,
22 /// For small files (< 10KB by default), store full content
23 pub content: Option<String>,
24 /// Compressed content (base64-encoded gzip) for larger key files
25 pub compressed_content: Option<String>,
26 /// Whether the file existed
27 pub exists: bool,
28}
29
30impl FileSnapshot {
31 /// Create a new file snapshot with the default content threshold (10KB).
32 ///
33 /// This version does not capture file content (content and `compressed_content` will be None).
34 /// Use `from_workspace` to create a snapshot with content from a workspace.
35 #[must_use]
36 pub fn new(path: &str, checksum: String, size: u64, exists: bool) -> Self {
37 Self {
38 path: path.to_string(),
39 checksum,
40 size,
41 content: None,
42 compressed_content: None,
43 exists,
44 }
45 }
46
47 /// Create a file snapshot from a workspace using the default content threshold (10KB).
48 ///
49 /// Files smaller than 10KB will have their content stored.
50 /// Key files (PROMPT.md, PLAN.md, ISSUES.md, NOTES.md) may be compressed if they
51 /// are between 10KB and 100KB.
52 pub fn from_workspace_default(
53 workspace: &dyn Workspace,
54 path: &str,
55 checksum: String,
56 size: u64,
57 exists: bool,
58 ) -> Self {
59 Self::from_workspace(
60 workspace,
61 path,
62 checksum,
63 size,
64 exists,
65 DEFAULT_CONTENT_THRESHOLD,
66 )
67 }
68
69 /// Create a file snapshot from a workspace, optionally capturing content.
70 ///
71 /// Files smaller than `max_size` bytes will have their content stored.
72 /// Key files (PROMPT.md, PLAN.md, ISSUES.md, NOTES.md) may be compressed if they
73 /// are between `max_size` and `MAX_COMPRESS_SIZE`.
74 pub fn from_workspace(
75 workspace: &dyn Workspace,
76 path: &str,
77 checksum: String,
78 size: u64,
79 exists: bool,
80 max_size: u64,
81 ) -> Self {
82 let content = if exists && size < max_size {
83 workspace.read(Path::new(path)).ok()
84 } else {
85 None
86 };
87
88 let compressed_content = if exists
89 && (path.contains("PROMPT.md")
90 || path.contains("PLAN.md")
91 || path.contains("ISSUES.md")
92 || path.contains("NOTES.md"))
93 && size >= max_size
94 && size < MAX_COMPRESS_SIZE
95 {
96 workspace.read_bytes(Path::new(path)).ok().and_then(|data| {
97 crate::checkpoint::execution_history::compression::compress(&data).ok()
98 })
99 } else {
100 None
101 };
102
103 Self {
104 path: path.to_string(),
105 checksum,
106 size,
107 content,
108 compressed_content,
109 exists,
110 }
111 }
112
113 /// Get the file content, decompressing if necessary.
114 #[must_use]
115 pub fn get_content(&self) -> Option<String> {
116 self.content.clone().or_else(|| {
117 self.compressed_content.as_ref().and_then(|compressed| {
118 crate::checkpoint::execution_history::compression::decompress(compressed).ok()
119 })
120 })
121 }
122
123 /// Create a snapshot for a non-existent file.
124 #[must_use]
125 pub fn not_found(path: &str) -> Self {
126 Self {
127 path: path.to_string(),
128 checksum: String::new(),
129 size: 0,
130 content: None,
131 compressed_content: None,
132 exists: false,
133 }
134 }
135
136 /// Verify that the current file state matches this snapshot using a workspace.
137 pub fn verify_with_workspace(&self, workspace: &dyn Workspace) -> bool {
138 let path = Path::new(&self.path);
139
140 if !self.exists {
141 return !workspace.exists(path);
142 }
143
144 let Ok(content) = workspace.read_bytes(path) else {
145 return false;
146 };
147
148 if content.len() as u64 != self.size {
149 return false;
150 }
151
152 let checksum = crate::checkpoint::state::calculate_checksum_from_bytes(&content);
153 checksum == self.checksum
154 }
155}