json_archive/atomic_file.rs
1// json-archive is a tool for tracking JSON file changes over time
2// Copyright (C) 2025 Peoples Grocers LLC
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published
6// by the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16//
17// To purchase a license under different terms contact admin@peoplesgrocers.com
18// To request changes, report bugs, or give user feedback contact
19// marxism@peoplesgrocers.com
20//
21
22//! Problem: how do you append data to a compressed archive without losing data?
23//!
24//! Gzip and similar formats don't support in-place append. To add one record
25//! to a 20GB archive, you decompress it all, add the record, and recompress.
26//!
27//! You have two options:
28//!
29//! Option A: Overwrite in place. Seek to byte 0 of the existing file and start
30//! writing the new compressed stream. No extra disk space needed. But if you
31//! fail mid-write (out of space, crash, power loss), you've corrupted the
32//! original and lost everything. With a 20GB file, that's a lot of time spent
33//! in the danger zone.
34//!
35//! Option B: Write to a new file, then swap. Requires 2x disk space temporarily,
36//! but the original stays intact until the new file is complete. If writing
37//! fails, you just delete the partial temp file.
38//!
39//! This module implements option B. I'm not comfortable with option A.
40//!
41//! The swap sequence:
42//! 1. Write new archive to `.archive.json.gz.a7bX2q`
43//! 2. Rename original to `.archive.json.gz.a7bX2q.old` (backup)
44//! 3. Rename temp to `archive.json.gz` (atomic on same filesystem)
45//! 4. Delete backup
46//!
47//! If writing fails, original is untouched. If the swap fails, we restore
48//! from backup. Data loss requires a kernel crash between steps 2 and 3.
49//!
50//! Assumes everything is on one filesystem. Cross-filesystem renames aren't
51//! atomic and we don't handle them.
52
53use std::path::{Path, PathBuf};
54use uuid::Uuid;
55
56use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
57
58/// Generate a rsync-style temporary filename with dot prefix and random suffix
59///
60/// For example: "archive.json.gz" -> ".archive.json.gz.a7bX2q"
61///
62/// The naming convention follows rsync's pattern:
63/// - Prefix with `.` to hide the file on Unix systems
64/// - Append a 6-character random suffix for uniqueness
65pub fn generate_temp_filename<P: AsRef<Path>>(path: P) -> PathBuf {
66 let path = path.as_ref();
67
68 // Generate 6-character random suffix using first 6 hex chars of a uuid
69 let uuid = Uuid::new_v4();
70 let hex = format!("{:x}", uuid.as_u128());
71 let random_suffix = &hex[..6];
72
73 // Get the filename
74 if let Some(filename) = path.file_name() {
75 if let Some(filename_str) = filename.to_str() {
76 // Create new filename: .{original}.{random}
77 let temp_filename = format!(".{}.{}", filename_str, random_suffix);
78
79 // Return path with new filename
80 if let Some(parent) = path.parent() {
81 return parent.join(temp_filename);
82 } else {
83 return PathBuf::from(temp_filename);
84 }
85 }
86 }
87
88 // Fallback: just add prefix and suffix to entire path
89 let mut temp_path = path.to_path_buf();
90 temp_path.set_file_name(format!(".{}.{}", path.display(), random_suffix));
91 temp_path
92}
93
94/// Atomically replace a file using rsync-style temp files
95///
96/// This performs the following sequence:
97/// 1. Write new content to temp_path (caller's responsibility - already done)
98/// 2. Move original_path -> .original_path.{random}.old (backup)
99/// 3. Move temp_path -> original_path (replace)
100/// 4. Delete .original_path.{random}.old (cleanup)
101///
102/// If any step fails, attempts to recover by restoring the backup.
103///
104/// # Arguments
105///
106/// * `original_path` - The file to be replaced
107/// * `temp_path` - The temporary file containing the new content
108///
109/// # Errors
110///
111/// Returns diagnostics if any step of the operation fails. The function
112/// attempts automatic recovery by restoring the backup if the replacement fails.
113pub fn atomic_replace_file<P: AsRef<Path>>(
114 original_path: P,
115 temp_path: P,
116) -> Result<(), Vec<Diagnostic>> {
117 let original = original_path.as_ref();
118 let temp = temp_path.as_ref();
119
120 // Generate backup filename with same random suffix as temp file
121 let backup_path = if let Some(filename) = original.file_name() {
122 if let Some(filename_str) = filename.to_str() {
123 // Extract random suffix from temp filename if it follows our pattern
124 let temp_filename = temp.file_name().and_then(|f| f.to_str()).unwrap_or("");
125 let random_suffix =
126 if temp_filename.starts_with('.') && temp_filename.contains(filename_str) {
127 // Extract suffix after the original filename
128 temp_filename.rsplit('.').next().unwrap_or("backup")
129 } else {
130 "backup"
131 };
132
133 let backup_filename = format!(".{}.{}.old", filename_str, random_suffix);
134 if let Some(parent) = original.parent() {
135 parent.join(backup_filename)
136 } else {
137 PathBuf::from(backup_filename)
138 }
139 } else {
140 original.with_extension("old")
141 }
142 } else {
143 original.with_extension("old")
144 };
145
146 // Step 1: Move original to backup
147 if let Err(e) = std::fs::rename(original, &backup_path) {
148 return Err(vec![Diagnostic::new(
149 DiagnosticLevel::Fatal,
150 DiagnosticCode::PathNotFound,
151 format!("I couldn't create backup of the original archive: {}", e),
152 )
153 .with_advice(
154 "Make sure you have write permission in this directory and sufficient disk space."
155 .to_string(),
156 )]);
157 }
158
159 // Step 2: Move temp to original
160 if let Err(e) = std::fs::rename(temp, original) {
161 // Recovery: Try to restore backup
162 let recovery_error = if std::fs::rename(&backup_path, original).is_ok() {
163 format!(
164 "I couldn't move the new archive into place: {}\nI've restored the original archive from backup.",
165 e
166 )
167 } else {
168 format!(
169 "I couldn't move the new archive into place: {}\nWARNING: I also failed to restore the backup. Your original is at: {}",
170 e,
171 backup_path.display()
172 )
173 };
174
175 return Err(vec![Diagnostic::new(
176 DiagnosticLevel::Fatal,
177 DiagnosticCode::PathNotFound,
178 recovery_error,
179 )
180 .with_advice(
181 "Check filesystem permissions and disk space. If the backup exists, you can manually restore it."
182 .to_string()
183 )]);
184 }
185
186 // Step 3: Delete backup
187 // This is non-critical - if it fails, we just leave the backup around
188 let _ = std::fs::remove_file(&backup_path);
189
190 Ok(())
191}
192
193#[cfg(test)]
194mod tests {
195 use super::*;
196 use std::fs::File;
197 use std::io::Write;
198 use tempfile::NamedTempFile;
199
200 #[test]
201 fn test_generate_temp_filename() {
202 let temp = generate_temp_filename("archive.json.gz");
203 let filename = temp.file_name().unwrap().to_str().unwrap();
204
205 // Should start with dot
206 assert!(filename.starts_with('.'));
207
208 // Should contain original filename
209 assert!(filename.contains("archive.json.gz"));
210
211 // Should have a random suffix (dot followed by 6 chars)
212 assert!(filename.matches('.').count() >= 3); // .archive.json.gz has 2, plus 1 before random
213 }
214
215 #[test]
216 fn test_atomic_replace_file() -> Result<(), Box<dyn std::error::Error>> {
217 // Create original file
218 let mut original = NamedTempFile::new()?;
219 writeln!(original, "original content")?;
220 original.flush()?;
221 let original_path = original.path().to_path_buf();
222
223 // Create temp file with new content
224 let temp_path = generate_temp_filename(&original_path);
225 {
226 let mut temp_file = File::create(&temp_path)?;
227 writeln!(temp_file, "new content")?;
228 }
229
230 // Perform atomic replace
231 atomic_replace_file(&original_path, &temp_path)
232 .map_err(|e| format!("Failed to replace file: {:?}", e))?;
233
234 // Verify new content
235 let content = std::fs::read_to_string(&original_path)?;
236 assert_eq!(content.trim(), "new content");
237
238 // Verify temp file is gone
239 assert!(!temp_path.exists());
240
241 // Verify backup is cleaned up
242 let backup_pattern = format!(".{}.", original_path.file_name().unwrap().to_str().unwrap());
243 let parent = original_path.parent().unwrap();
244 let backups: Vec<_> = std::fs::read_dir(parent)?
245 .filter_map(|e| e.ok())
246 .filter(|e| {
247 e.file_name()
248 .to_str()
249 .map(|s| s.contains(&backup_pattern) && s.ends_with(".old"))
250 .unwrap_or(false)
251 })
252 .collect();
253 assert_eq!(backups.len(), 0, "Backup file should be cleaned up");
254
255 Ok(())
256 }
257}