json_archive/
atomic_file.rs

1// json-archive is a tool for tracking JSON file changes over time
2// Copyright (C) 2025  Peoples Grocers LLC
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published
6// by the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16//
17// To purchase a license under different terms contact admin@peoplesgrocers.com
18// To request changes, report bugs, or give user feedback contact
19// marxism@peoplesgrocers.com
20//
21
22//! Problem: how do you append data to a compressed archive without losing data?
23//!
24//! Gzip and similar formats don't support in-place append. To add one record
25//! to a 20GB archive, you decompress it all, add the record, and recompress.
26//!
27//! You have two options:
28//!
29//! Option A: Overwrite in place. Seek to byte 0 of the existing file and start
30//! writing the new compressed stream. No extra disk space needed. But if you
31//! fail mid-write (out of space, crash, power loss), you've corrupted the
32//! original and lost everything. With a 20GB file, that's a lot of time spent
33//! in the danger zone.
34//!
35//! Option B: Write to a new file, then swap. Requires 2x disk space temporarily,
36//! but the original stays intact until the new file is complete. If writing
37//! fails, you just delete the partial temp file.
38//!
39//! This module implements option B. I'm not comfortable with option A.
40//!
41//! The swap sequence:
42//! 1. Write new archive to `.archive.json.gz.a7bX2q`
43//! 2. Rename original to `.archive.json.gz.a7bX2q.old` (backup)
44//! 3. Rename temp to `archive.json.gz` (atomic on same filesystem)
45//! 4. Delete backup
46//!
47//! If writing fails, original is untouched. If the swap fails, we restore
48//! from backup. Data loss requires a kernel crash between steps 2 and 3.
49//!
50//! Assumes everything is on one filesystem. Cross-filesystem renames aren't
51//! atomic and we don't handle them.
52
53use std::path::{Path, PathBuf};
54use uuid::Uuid;
55
56use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
57
58/// Generate a rsync-style temporary filename with dot prefix and random suffix
59///
60/// For example: "archive.json.gz" -> ".archive.json.gz.a7bX2q"
61///
62/// The naming convention follows rsync's pattern:
63/// - Prefix with `.` to hide the file on Unix systems
64/// - Append a 6-character random suffix for uniqueness
65pub fn generate_temp_filename<P: AsRef<Path>>(path: P) -> PathBuf {
66    let path = path.as_ref();
67
68    // Generate 6-character random suffix using first 6 hex chars of a uuid
69    let uuid = Uuid::new_v4();
70    let hex = format!("{:x}", uuid.as_u128());
71    let random_suffix = &hex[..6];
72
73    // Get the filename
74    if let Some(filename) = path.file_name() {
75        if let Some(filename_str) = filename.to_str() {
76            // Create new filename: .{original}.{random}
77            let temp_filename = format!(".{}.{}", filename_str, random_suffix);
78
79            // Return path with new filename
80            if let Some(parent) = path.parent() {
81                return parent.join(temp_filename);
82            } else {
83                return PathBuf::from(temp_filename);
84            }
85        }
86    }
87
88    // Fallback: just add prefix and suffix to entire path
89    let mut temp_path = path.to_path_buf();
90    temp_path.set_file_name(format!(".{}.{}", path.display(), random_suffix));
91    temp_path
92}
93
94/// Atomically replace a file using rsync-style temp files
95///
96/// This performs the following sequence:
97/// 1. Write new content to temp_path (caller's responsibility - already done)
98/// 2. Move original_path -> .original_path.{random}.old (backup)
99/// 3. Move temp_path -> original_path (replace)
100/// 4. Delete .original_path.{random}.old (cleanup)
101///
102/// If any step fails, attempts to recover by restoring the backup.
103///
104/// # Arguments
105///
106/// * `original_path` - The file to be replaced
107/// * `temp_path` - The temporary file containing the new content
108///
109/// # Errors
110///
111/// Returns diagnostics if any step of the operation fails. The function
112/// attempts automatic recovery by restoring the backup if the replacement fails.
113pub fn atomic_replace_file<P: AsRef<Path>>(original_path: P, temp_path: P) -> Result<(), Vec<Diagnostic>> {
114    let original = original_path.as_ref();
115    let temp = temp_path.as_ref();
116
117    // Generate backup filename with same random suffix as temp file
118    let backup_path = if let Some(filename) = original.file_name() {
119        if let Some(filename_str) = filename.to_str() {
120            // Extract random suffix from temp filename if it follows our pattern
121            let temp_filename = temp.file_name().and_then(|f| f.to_str()).unwrap_or("");
122            let random_suffix = if temp_filename.starts_with('.') && temp_filename.contains(filename_str) {
123                // Extract suffix after the original filename
124                temp_filename.rsplit('.').next().unwrap_or("backup")
125            } else {
126                "backup"
127            };
128
129            let backup_filename = format!(".{}.{}.old", filename_str, random_suffix);
130            if let Some(parent) = original.parent() {
131                parent.join(backup_filename)
132            } else {
133                PathBuf::from(backup_filename)
134            }
135        } else {
136            original.with_extension("old")
137        }
138    } else {
139        original.with_extension("old")
140    };
141
142    // Step 1: Move original to backup
143    if let Err(e) = std::fs::rename(original, &backup_path) {
144        return Err(vec![Diagnostic::new(
145            DiagnosticLevel::Fatal,
146            DiagnosticCode::PathNotFound,
147            format!("I couldn't create backup of the original archive: {}", e),
148        )
149        .with_advice(
150            "Make sure you have write permission in this directory and sufficient disk space."
151                .to_string()
152        )]);
153    }
154
155    // Step 2: Move temp to original
156    if let Err(e) = std::fs::rename(temp, original) {
157        // Recovery: Try to restore backup
158        let recovery_error = if std::fs::rename(&backup_path, original).is_ok() {
159            format!(
160                "I couldn't move the new archive into place: {}\nI've restored the original archive from backup.",
161                e
162            )
163        } else {
164            format!(
165                "I couldn't move the new archive into place: {}\nWARNING: I also failed to restore the backup. Your original is at: {}",
166                e,
167                backup_path.display()
168            )
169        };
170
171        return Err(vec![Diagnostic::new(
172            DiagnosticLevel::Fatal,
173            DiagnosticCode::PathNotFound,
174            recovery_error,
175        )
176        .with_advice(
177            "Check filesystem permissions and disk space. If the backup exists, you can manually restore it."
178                .to_string()
179        )]);
180    }
181
182    // Step 3: Delete backup
183    // This is non-critical - if it fails, we just leave the backup around
184    let _ = std::fs::remove_file(&backup_path);
185
186    Ok(())
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192    use std::fs::File;
193    use std::io::Write;
194    use tempfile::NamedTempFile;
195
196    #[test]
197    fn test_generate_temp_filename() {
198        let temp = generate_temp_filename("archive.json.gz");
199        let filename = temp.file_name().unwrap().to_str().unwrap();
200
201        // Should start with dot
202        assert!(filename.starts_with('.'));
203
204        // Should contain original filename
205        assert!(filename.contains("archive.json.gz"));
206
207        // Should have a random suffix (dot followed by 6 chars)
208        assert!(filename.matches('.').count() >= 3); // .archive.json.gz has 2, plus 1 before random
209    }
210
211    #[test]
212    fn test_atomic_replace_file() -> Result<(), Box<dyn std::error::Error>> {
213        // Create original file
214        let mut original = NamedTempFile::new()?;
215        writeln!(original, "original content")?;
216        original.flush()?;
217        let original_path = original.path().to_path_buf();
218
219        // Create temp file with new content
220        let temp_path = generate_temp_filename(&original_path);
221        {
222            let mut temp_file = File::create(&temp_path)?;
223            writeln!(temp_file, "new content")?;
224        }
225
226        // Perform atomic replace
227        atomic_replace_file(&original_path, &temp_path)
228            .map_err(|e| format!("Failed to replace file: {:?}", e))?;
229
230        // Verify new content
231        let content = std::fs::read_to_string(&original_path)?;
232        assert_eq!(content.trim(), "new content");
233
234        // Verify temp file is gone
235        assert!(!temp_path.exists());
236
237        // Verify backup is cleaned up
238        let backup_pattern = format!(".{}.", original_path.file_name().unwrap().to_str().unwrap());
239        let parent = original_path.parent().unwrap();
240        let backups: Vec<_> = std::fs::read_dir(parent)?
241            .filter_map(|e| e.ok())
242            .filter(|e| {
243                e.file_name()
244                    .to_str()
245                    .map(|s| s.contains(&backup_pattern) && s.ends_with(".old"))
246                    .unwrap_or(false)
247            })
248            .collect();
249        assert_eq!(backups.len(), 0, "Backup file should be cleaned up");
250
251        Ok(())
252    }
253}