json_archive/atomic_file.rs
1// json-archive is a tool for tracking JSON file changes over time
2// Copyright (C) 2025 Peoples Grocers LLC
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published
6// by the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16//
17// To purchase a license under different terms contact admin@peoplesgrocers.com
18// To request changes, report bugs, or give user feedback contact
19// marxism@peoplesgrocers.com
20//
21
22//! Problem: how do you append data to a compressed archive without losing data?
23//!
24//! Gzip and similar formats don't support in-place append. To add one record
25//! to a 20GB archive, you decompress it all, add the record, and recompress.
26//!
27//! You have two options:
28//!
29//! Option A: Overwrite in place. Seek to byte 0 of the existing file and start
30//! writing the new compressed stream. No extra disk space needed. But if you
31//! fail mid-write (out of space, crash, power loss), you've corrupted the
32//! original and lost everything. With a 20GB file, that's a lot of time spent
33//! in the danger zone.
34//!
35//! Option B: Write to a new file, then swap. Requires 2x disk space temporarily,
36//! but the original stays intact until the new file is complete. If writing
37//! fails, you just delete the partial temp file.
38//!
39//! This module implements option B. I'm not comfortable with option A.
40//!
41//! The swap sequence:
42//! 1. Write new archive to `.archive.json.gz.a7bX2q`
43//! 2. Rename original to `.archive.json.gz.a7bX2q.old` (backup)
44//! 3. Rename temp to `archive.json.gz` (atomic on same filesystem)
45//! 4. Delete backup
46//!
47//! If writing fails, original is untouched. If the swap fails, we restore
48//! from backup. Data loss requires a kernel crash between steps 2 and 3.
49//!
50//! Assumes everything is on one filesystem. Cross-filesystem renames aren't
51//! atomic and we don't handle them.
52
53use std::path::{Path, PathBuf};
54use uuid::Uuid;
55
56use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
57
58/// Generate a rsync-style temporary filename with dot prefix and random suffix
59///
60/// For example: "archive.json.gz" -> ".archive.json.gz.a7bX2q"
61///
62/// The naming convention follows rsync's pattern:
63/// - Prefix with `.` to hide the file on Unix systems
64/// - Append a 6-character random suffix for uniqueness
65pub fn generate_temp_filename<P: AsRef<Path>>(path: P) -> PathBuf {
66 let path = path.as_ref();
67
68 // Generate 6-character random suffix using first 6 hex chars of a uuid
69 let uuid = Uuid::new_v4();
70 let hex = format!("{:x}", uuid.as_u128());
71 let random_suffix = &hex[..6];
72
73 // Get the filename
74 if let Some(filename) = path.file_name() {
75 if let Some(filename_str) = filename.to_str() {
76 // Create new filename: .{original}.{random}
77 let temp_filename = format!(".{}.{}", filename_str, random_suffix);
78
79 // Return path with new filename
80 if let Some(parent) = path.parent() {
81 return parent.join(temp_filename);
82 } else {
83 return PathBuf::from(temp_filename);
84 }
85 }
86 }
87
88 // Fallback: just add prefix and suffix to entire path
89 let mut temp_path = path.to_path_buf();
90 temp_path.set_file_name(format!(".{}.{}", path.display(), random_suffix));
91 temp_path
92}
93
94/// Atomically replace a file using rsync-style temp files
95///
96/// This performs the following sequence:
97/// 1. Write new content to temp_path (caller's responsibility - already done)
98/// 2. Move original_path -> .original_path.{random}.old (backup)
99/// 3. Move temp_path -> original_path (replace)
100/// 4. Delete .original_path.{random}.old (cleanup)
101///
102/// If any step fails, attempts to recover by restoring the backup.
103///
104/// # Arguments
105///
106/// * `original_path` - The file to be replaced
107/// * `temp_path` - The temporary file containing the new content
108///
109/// # Errors
110///
111/// Returns diagnostics if any step of the operation fails. The function
112/// attempts automatic recovery by restoring the backup if the replacement fails.
113pub fn atomic_replace_file<P: AsRef<Path>>(original_path: P, temp_path: P) -> Result<(), Vec<Diagnostic>> {
114 let original = original_path.as_ref();
115 let temp = temp_path.as_ref();
116
117 // Generate backup filename with same random suffix as temp file
118 let backup_path = if let Some(filename) = original.file_name() {
119 if let Some(filename_str) = filename.to_str() {
120 // Extract random suffix from temp filename if it follows our pattern
121 let temp_filename = temp.file_name().and_then(|f| f.to_str()).unwrap_or("");
122 let random_suffix = if temp_filename.starts_with('.') && temp_filename.contains(filename_str) {
123 // Extract suffix after the original filename
124 temp_filename.rsplit('.').next().unwrap_or("backup")
125 } else {
126 "backup"
127 };
128
129 let backup_filename = format!(".{}.{}.old", filename_str, random_suffix);
130 if let Some(parent) = original.parent() {
131 parent.join(backup_filename)
132 } else {
133 PathBuf::from(backup_filename)
134 }
135 } else {
136 original.with_extension("old")
137 }
138 } else {
139 original.with_extension("old")
140 };
141
142 // Step 1: Move original to backup
143 if let Err(e) = std::fs::rename(original, &backup_path) {
144 return Err(vec![Diagnostic::new(
145 DiagnosticLevel::Fatal,
146 DiagnosticCode::PathNotFound,
147 format!("I couldn't create backup of the original archive: {}", e),
148 )
149 .with_advice(
150 "Make sure you have write permission in this directory and sufficient disk space."
151 .to_string()
152 )]);
153 }
154
155 // Step 2: Move temp to original
156 if let Err(e) = std::fs::rename(temp, original) {
157 // Recovery: Try to restore backup
158 let recovery_error = if std::fs::rename(&backup_path, original).is_ok() {
159 format!(
160 "I couldn't move the new archive into place: {}\nI've restored the original archive from backup.",
161 e
162 )
163 } else {
164 format!(
165 "I couldn't move the new archive into place: {}\nWARNING: I also failed to restore the backup. Your original is at: {}",
166 e,
167 backup_path.display()
168 )
169 };
170
171 return Err(vec![Diagnostic::new(
172 DiagnosticLevel::Fatal,
173 DiagnosticCode::PathNotFound,
174 recovery_error,
175 )
176 .with_advice(
177 "Check filesystem permissions and disk space. If the backup exists, you can manually restore it."
178 .to_string()
179 )]);
180 }
181
182 // Step 3: Delete backup
183 // This is non-critical - if it fails, we just leave the backup around
184 let _ = std::fs::remove_file(&backup_path);
185
186 Ok(())
187}
188
189#[cfg(test)]
190mod tests {
191 use super::*;
192 use std::fs::File;
193 use std::io::Write;
194 use tempfile::NamedTempFile;
195
196 #[test]
197 fn test_generate_temp_filename() {
198 let temp = generate_temp_filename("archive.json.gz");
199 let filename = temp.file_name().unwrap().to_str().unwrap();
200
201 // Should start with dot
202 assert!(filename.starts_with('.'));
203
204 // Should contain original filename
205 assert!(filename.contains("archive.json.gz"));
206
207 // Should have a random suffix (dot followed by 6 chars)
208 assert!(filename.matches('.').count() >= 3); // .archive.json.gz has 2, plus 1 before random
209 }
210
211 #[test]
212 fn test_atomic_replace_file() -> Result<(), Box<dyn std::error::Error>> {
213 // Create original file
214 let mut original = NamedTempFile::new()?;
215 writeln!(original, "original content")?;
216 original.flush()?;
217 let original_path = original.path().to_path_buf();
218
219 // Create temp file with new content
220 let temp_path = generate_temp_filename(&original_path);
221 {
222 let mut temp_file = File::create(&temp_path)?;
223 writeln!(temp_file, "new content")?;
224 }
225
226 // Perform atomic replace
227 atomic_replace_file(&original_path, &temp_path)
228 .map_err(|e| format!("Failed to replace file: {:?}", e))?;
229
230 // Verify new content
231 let content = std::fs::read_to_string(&original_path)?;
232 assert_eq!(content.trim(), "new content");
233
234 // Verify temp file is gone
235 assert!(!temp_path.exists());
236
237 // Verify backup is cleaned up
238 let backup_pattern = format!(".{}.", original_path.file_name().unwrap().to_str().unwrap());
239 let parent = original_path.parent().unwrap();
240 let backups: Vec<_> = std::fs::read_dir(parent)?
241 .filter_map(|e| e.ok())
242 .filter(|e| {
243 e.file_name()
244 .to_str()
245 .map(|s| s.contains(&backup_pattern) && s.ends_with(".old"))
246 .unwrap_or(false)
247 })
248 .collect();
249 assert_eq!(backups.len(), 0, "Backup file should be cleaned up");
250
251 Ok(())
252 }
253}