json_archive/
atomic_file.rs

1// json-archive is a tool for tracking JSON file changes over time
2// Copyright (C) 2025  Peoples Grocers LLC
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published
6// by the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16//
17// To purchase a license under different terms contact admin@peoplesgrocers.com
18// To request changes, report bugs, or give user feedback contact
19// marxism@peoplesgrocers.com
20//
21
22//! Problem: how do you append data to a compressed archive without losing data?
23//!
24//! Gzip and similar formats don't support in-place append. To add one record
25//! to a 20GB archive, you decompress it all, add the record, and recompress.
26//!
27//! You have two options:
28//!
29//! Option A: Overwrite in place. Seek to byte 0 of the existing file and start
30//! writing the new compressed stream. No extra disk space needed. But if you
31//! fail mid-write (out of space, crash, power loss), you've corrupted the
32//! original and lost everything. With a 20GB file, that's a lot of time spent
33//! in the danger zone.
34//!
35//! Option B: Write to a new file, then swap. Requires 2x disk space temporarily,
36//! but the original stays intact until the new file is complete. If writing
37//! fails, you just delete the partial temp file.
38//!
39//! This module implements option B. I'm not comfortable with option A.
40//!
41//! The swap sequence:
42//! 1. Write new archive to `.archive.json.gz.a7bX2q`
43//! 2. Rename original to `.archive.json.gz.a7bX2q.old` (backup)
44//! 3. Rename temp to `archive.json.gz` (atomic on same filesystem)
45//! 4. Delete backup
46//!
47//! If writing fails, original is untouched. If the swap fails, we restore
48//! from backup. Data loss requires a kernel crash between steps 2 and 3.
49//!
50//! Assumes everything is on one filesystem. Cross-filesystem renames aren't
51//! atomic and we don't handle them.
52
53use std::path::{Path, PathBuf};
54use uuid::Uuid;
55
56use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
57
58/// Generate a rsync-style temporary filename with dot prefix and random suffix
59///
60/// For example: "archive.json.gz" -> ".archive.json.gz.a7bX2q"
61///
62/// The naming convention follows rsync's pattern:
63/// - Prefix with `.` to hide the file on Unix systems
64/// - Append a 6-character random suffix for uniqueness
65pub fn generate_temp_filename<P: AsRef<Path>>(path: P) -> PathBuf {
66    let path = path.as_ref();
67
68    // Generate 6-character random suffix using first 6 hex chars of a uuid
69    let uuid = Uuid::new_v4();
70    let hex = format!("{:x}", uuid.as_u128());
71    let random_suffix = &hex[..6];
72
73    // Get the filename
74    if let Some(filename) = path.file_name() {
75        if let Some(filename_str) = filename.to_str() {
76            // Create new filename: .{original}.{random}
77            let temp_filename = format!(".{}.{}", filename_str, random_suffix);
78
79            // Return path with new filename
80            if let Some(parent) = path.parent() {
81                return parent.join(temp_filename);
82            } else {
83                return PathBuf::from(temp_filename);
84            }
85        }
86    }
87
88    // Fallback: just add prefix and suffix to entire path
89    let mut temp_path = path.to_path_buf();
90    temp_path.set_file_name(format!(".{}.{}", path.display(), random_suffix));
91    temp_path
92}
93
94/// Atomically replace a file using rsync-style temp files
95///
96/// This performs the following sequence:
97/// 1. Write new content to temp_path (caller's responsibility - already done)
98/// 2. Move original_path -> .original_path.{random}.old (backup)
99/// 3. Move temp_path -> original_path (replace)
100/// 4. Delete .original_path.{random}.old (cleanup)
101///
102/// If any step fails, attempts to recover by restoring the backup.
103///
104/// # Arguments
105///
106/// * `original_path` - The file to be replaced
107/// * `temp_path` - The temporary file containing the new content
108///
109/// # Errors
110///
111/// Returns diagnostics if any step of the operation fails. The function
112/// attempts automatic recovery by restoring the backup if the replacement fails.
113pub fn atomic_replace_file<P: AsRef<Path>>(
114    original_path: P,
115    temp_path: P,
116) -> Result<(), Vec<Diagnostic>> {
117    let original = original_path.as_ref();
118    let temp = temp_path.as_ref();
119
120    // Generate backup filename with same random suffix as temp file
121    let backup_path = if let Some(filename) = original.file_name() {
122        if let Some(filename_str) = filename.to_str() {
123            // Extract random suffix from temp filename if it follows our pattern
124            let temp_filename = temp.file_name().and_then(|f| f.to_str()).unwrap_or("");
125            let random_suffix =
126                if temp_filename.starts_with('.') && temp_filename.contains(filename_str) {
127                    // Extract suffix after the original filename
128                    temp_filename.rsplit('.').next().unwrap_or("backup")
129                } else {
130                    "backup"
131                };
132
133            let backup_filename = format!(".{}.{}.old", filename_str, random_suffix);
134            if let Some(parent) = original.parent() {
135                parent.join(backup_filename)
136            } else {
137                PathBuf::from(backup_filename)
138            }
139        } else {
140            original.with_extension("old")
141        }
142    } else {
143        original.with_extension("old")
144    };
145
146    // Step 1: Move original to backup
147    if let Err(e) = std::fs::rename(original, &backup_path) {
148        return Err(vec![Diagnostic::new(
149            DiagnosticLevel::Fatal,
150            DiagnosticCode::PathNotFound,
151            format!("I couldn't create backup of the original archive: {}", e),
152        )
153        .with_advice(
154            "Make sure you have write permission in this directory and sufficient disk space."
155                .to_string(),
156        )]);
157    }
158
159    // Step 2: Move temp to original
160    if let Err(e) = std::fs::rename(temp, original) {
161        // Recovery: Try to restore backup
162        let recovery_error = if std::fs::rename(&backup_path, original).is_ok() {
163            format!(
164                "I couldn't move the new archive into place: {}\nI've restored the original archive from backup.",
165                e
166            )
167        } else {
168            format!(
169                "I couldn't move the new archive into place: {}\nWARNING: I also failed to restore the backup. Your original is at: {}",
170                e,
171                backup_path.display()
172            )
173        };
174
175        return Err(vec![Diagnostic::new(
176            DiagnosticLevel::Fatal,
177            DiagnosticCode::PathNotFound,
178            recovery_error,
179        )
180        .with_advice(
181            "Check filesystem permissions and disk space. If the backup exists, you can manually restore it."
182                .to_string()
183        )]);
184    }
185
186    // Step 3: Delete backup
187    // This is non-critical - if it fails, we just leave the backup around
188    let _ = std::fs::remove_file(&backup_path);
189
190    Ok(())
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196    use std::fs::File;
197    use std::io::Write;
198    use tempfile::NamedTempFile;
199
200    #[test]
201    fn test_generate_temp_filename() {
202        let temp = generate_temp_filename("archive.json.gz");
203        let filename = temp.file_name().unwrap().to_str().unwrap();
204
205        // Should start with dot
206        assert!(filename.starts_with('.'));
207
208        // Should contain original filename
209        assert!(filename.contains("archive.json.gz"));
210
211        // Should have a random suffix (dot followed by 6 chars)
212        assert!(filename.matches('.').count() >= 3); // .archive.json.gz has 2, plus 1 before random
213    }
214
215    #[test]
216    fn test_atomic_replace_file() -> Result<(), Box<dyn std::error::Error>> {
217        // Create original file
218        let mut original = NamedTempFile::new()?;
219        writeln!(original, "original content")?;
220        original.flush()?;
221        let original_path = original.path().to_path_buf();
222
223        // Create temp file with new content
224        let temp_path = generate_temp_filename(&original_path);
225        {
226            let mut temp_file = File::create(&temp_path)?;
227            writeln!(temp_file, "new content")?;
228        }
229
230        // Perform atomic replace
231        atomic_replace_file(&original_path, &temp_path)
232            .map_err(|e| format!("Failed to replace file: {:?}", e))?;
233
234        // Verify new content
235        let content = std::fs::read_to_string(&original_path)?;
236        assert_eq!(content.trim(), "new content");
237
238        // Verify temp file is gone
239        assert!(!temp_path.exists());
240
241        // Verify backup is cleaned up
242        let backup_pattern = format!(".{}.", original_path.file_name().unwrap().to_str().unwrap());
243        let parent = original_path.parent().unwrap();
244        let backups: Vec<_> = std::fs::read_dir(parent)?
245            .filter_map(|e| e.ok())
246            .filter(|e| {
247                e.file_name()
248                    .to_str()
249                    .map(|s| s.contains(&backup_pattern) && s.ends_with(".old"))
250                    .unwrap_or(false)
251            })
252            .collect();
253        assert_eq!(backups.len(), 0, "Backup file should be cleaned up");
254
255        Ok(())
256    }
257}