squib_snapshot/atomic.rs
1//! Atomic temp-file + fsync + rename pattern (D25).
2//!
3//! Implements the rule from
4//! [16-snapshots.md § 2](../../../specs/16-snapshots.md#2-state-file): every snapshot
5//! file is staged into `<dest>.tmp` next to the destination, fsynced, then atomically
6//! renamed. A half-disk-full host or a SIGTERM mid-save never corrupts the previous
7//! snapshot pair.
8//!
9//! Two safety properties:
10//! - Pre-flight cross-filesystem check: if the temp directory and destination live on different
11//! filesystems, `rename(2)` cannot be atomic. We reject before opening any file. (Risk row in 91
12//! § 12 — "User points `<id>.snap.tmp` at a different filesystem.")
13//! - Best-effort cleanup: on any failure between `open(O_CREAT)` and the final `rename`, the temp
14//! file is unlinked.
15
16use std::{
17 fs::{File, OpenOptions},
18 io::{self, Write},
19 path::{Path, PathBuf},
20};
21
22use crate::error::SnapshotError;
23
24/// Suffix appended to the destination path to derive the temp-file name.
25pub const TEMP_SUFFIX: &str = ".tmp";
26
27/// Validate that the temp-file path's directory and the destination share a
28/// filesystem. Required for `rename(2)` atomicity.
29///
30/// On Unix we compare `stat::st_dev`. On non-Unix targets (Windows, WASI) the check
31/// is best-effort: we currently treat them as same-fs (the pager + writer code
32/// targets macOS — the cross-platform stub is for compile-time portability of the
33/// snapshot crate itself).
34///
35/// # Errors
36/// [`SnapshotError::AtomicCommitCrossFs`] if the two paths' parent directories live
37/// on different filesystems; [`SnapshotError::Io`] if either path cannot be `stat`-ed.
38pub fn check_same_filesystem(dest: &Path, temp: &Path) -> Result<(), SnapshotError> {
39 #[cfg(unix)]
40 {
41 use std::os::unix::fs::MetadataExt;
42 // The destination may not exist yet (e.g. first save). Probe its parent
43 // directory's filesystem; that's where `rename(2)` will land.
44 let dest_dir = dest.parent().unwrap_or_else(|| Path::new("."));
45 let temp_dir = temp.parent().unwrap_or_else(|| Path::new("."));
46 let dest_dev = std::fs::metadata(dest_dir)
47 .map_err(SnapshotError::Io)?
48 .dev();
49 let temp_dev = std::fs::metadata(temp_dir)
50 .map_err(SnapshotError::Io)?
51 .dev();
52 if dest_dev != temp_dev {
53 return Err(SnapshotError::AtomicCommitCrossFs {
54 dest: dest.to_path_buf(),
55 temp_dir: temp_dir.to_path_buf(),
56 });
57 }
58 Ok(())
59 }
60 #[cfg(not(unix))]
61 {
62 let _ = (dest, temp);
63 Ok(())
64 }
65}
66
67/// Derive the temp-file name from a destination path: `dest + TEMP_SUFFIX`.
68///
69/// The temp file lives next to the destination, in the same directory, so
70/// `rename(2)` is atomic by construction (same filesystem). This *is* the contract
71/// from D25 — moving the temp file to e.g. `/tmp` would defeat the atomicity.
72#[must_use]
73pub fn derive_temp_path(dest: &Path) -> PathBuf {
74 let mut s = dest.as_os_str().to_owned();
75 s.push(TEMP_SUFFIX);
76 PathBuf::from(s)
77}
78
79/// Drop-guard that unlinks a path on drop unless [`disarm`](Self::disarm) is called.
80///
81/// Used to ensure that any failure between temp-file open and final rename leaves
82/// the filesystem clean.
83#[derive(Debug)]
84pub struct UnlinkOnDrop {
85 path: Option<PathBuf>,
86}
87
88impl UnlinkOnDrop {
89 /// Arm the guard for `path`.
90 #[must_use]
91 pub fn new(path: PathBuf) -> Self {
92 Self { path: Some(path) }
93 }
94
95 /// Disarm the guard — the path will not be unlinked when the guard drops.
96 pub fn disarm(&mut self) {
97 self.path = None;
98 }
99
100 /// The path the guard would unlink (if armed).
101 #[must_use]
102 pub fn path(&self) -> Option<&Path> {
103 self.path.as_deref()
104 }
105}
106
107impl Drop for UnlinkOnDrop {
108 fn drop(&mut self) {
109 if let Some(path) = self.path.take() {
110 // `remove_file` errors are deliberately swallowed: the only sensible
111 // recovery is "log and move on" — the temp file is leaked but the
112 // destination pair is untouched, which is the property D25 promises.
113 let _ = std::fs::remove_file(&path);
114 }
115 }
116}
117
118/// Atomic-write fixture: open a temp file, hand the caller a writer, then either
119/// fsync + rename it onto `dest` or unlink it on error.
120///
121/// The pattern in `commit`:
122/// 1. Caller writes payload via [`Self::write_all`].
123/// 2. Caller invokes [`Self::commit`].
124/// 3. We `flush` + `sync_all` + `close`, then `rename(2)` onto the destination.
125/// 4. On error in step 3, the [`UnlinkOnDrop`] guard removes the temp file.
126///
127/// The cross-FS pre-flight check runs in [`Self::open`]; subsequent `commit` cannot
128/// fail with `EXDEV` because we've already validated.
129#[derive(Debug)]
130pub struct AtomicWriter {
131 file: File,
132 temp_path: PathBuf,
133 dest_path: PathBuf,
134 guard: UnlinkOnDrop,
135}
136
137impl AtomicWriter {
138 /// Open the temp file for writing.
139 ///
140 /// Pre-flight: validate that the temp dir and destination dir live on the same
141 /// filesystem (cross-FS rename cannot be atomic).
142 ///
143 /// # Errors
144 /// [`SnapshotError::AtomicCommitCrossFs`] for the cross-FS case;
145 /// [`SnapshotError::Io`] for an open / mkdir / stat failure.
146 pub fn open(dest: &Path) -> Result<Self, SnapshotError> {
147 let temp = derive_temp_path(dest);
148 check_same_filesystem(dest, &temp)?;
149 let file = OpenOptions::new()
150 .write(true)
151 .create(true)
152 .truncate(true)
153 .open(&temp)?;
154 let guard = UnlinkOnDrop::new(temp.clone());
155 Ok(Self {
156 file,
157 temp_path: temp,
158 dest_path: dest.to_path_buf(),
159 guard,
160 })
161 }
162
163 /// The temp-file path (visible during writing, before commit).
164 #[must_use]
165 pub fn temp_path(&self) -> &Path {
166 &self.temp_path
167 }
168
169 /// The destination path the commit will produce.
170 #[must_use]
171 pub fn dest_path(&self) -> &Path {
172 &self.dest_path
173 }
174
175 /// Borrow the underlying file for direct writes (used by the streaming envelope
176 /// + memory file paths).
177 pub fn file_mut(&mut self) -> &mut File {
178 &mut self.file
179 }
180
181 /// fsync the temp file, drop the writer, and `rename(2)` it onto the destination.
182 ///
183 /// # Errors
184 /// [`SnapshotError::AtomicCommitFailed`] for the rename failure (temp file is
185 /// unlinked); [`SnapshotError::Io`] for an fsync failure (temp file is also
186 /// unlinked).
187 pub fn commit(mut self) -> Result<(), SnapshotError> {
188 self.file.flush()?;
189 self.file.sync_all()?;
190 // Drop the file handle before rename: macOS / Linux both allow rename
191 // over an open file, but closing first removes any chance of a stale
192 // descriptor confusing the test fixture.
193 drop(self.file);
194 match std::fs::rename(&self.temp_path, &self.dest_path) {
195 Ok(()) => {
196 // Disarm the guard so we don't unlink a destination we just
197 // committed.
198 self.guard.disarm();
199 Ok(())
200 }
201 Err(e) => Err(SnapshotError::AtomicCommitFailed(e)),
202 }
203 }
204}
205
206impl Write for AtomicWriter {
207 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
208 self.file.write(buf)
209 }
210
211 fn flush(&mut self) -> io::Result<()> {
212 self.file.flush()
213 }
214}
215
216#[cfg(test)]
217mod tests {
218 use tempfile::TempDir;
219
220 use super::*;
221
222 fn dest_in(dir: &Path, name: &str) -> PathBuf {
223 dir.join(name)
224 }
225
226 #[test]
227 fn test_should_derive_temp_path_with_tmp_suffix() {
228 let p = derive_temp_path(Path::new("/tmp/x.snap"));
229 assert_eq!(p, PathBuf::from("/tmp/x.snap.tmp"));
230 }
231
232 #[test]
233 fn test_should_commit_temp_file_onto_destination() {
234 let dir = TempDir::new().unwrap();
235 let dest = dest_in(dir.path(), "x.snap");
236 let mut w = AtomicWriter::open(&dest).unwrap();
237 w.write_all(b"hello world").unwrap();
238 w.commit().unwrap();
239
240 let s = std::fs::read_to_string(&dest).unwrap();
241 assert_eq!(s, "hello world");
242 // No stranded temp.
243 assert!(!derive_temp_path(&dest).exists());
244 }
245
246 #[test]
247 fn test_should_unlink_temp_when_writer_dropped_without_commit() {
248 let dir = TempDir::new().unwrap();
249 let dest = dest_in(dir.path(), "x.snap");
250 {
251 let mut w = AtomicWriter::open(&dest).unwrap();
252 w.write_all(b"partial").unwrap();
253 // No commit() — guard fires on drop.
254 }
255 assert!(!dest.exists(), "no commit happened");
256 assert!(!derive_temp_path(&dest).exists(), "temp file leaked");
257 }
258
259 #[test]
260 fn test_should_leave_existing_destination_alone_when_writer_dropped() {
261 let dir = TempDir::new().unwrap();
262 let dest = dest_in(dir.path(), "x.snap");
263 std::fs::write(&dest, b"prior good").unwrap();
264 {
265 let mut w = AtomicWriter::open(&dest).unwrap();
266 w.write_all(b"new bad").unwrap();
267 }
268 let s = std::fs::read_to_string(&dest).unwrap();
269 assert_eq!(s, "prior good", "previous good pair clobbered");
270 }
271
272 #[test]
273 fn test_should_handle_back_to_back_atomic_writes() {
274 let dir = TempDir::new().unwrap();
275 let dest = dest_in(dir.path(), "x.snap");
276 for i in 0..3 {
277 let mut w = AtomicWriter::open(&dest).unwrap();
278 let payload = format!("snapshot {i}");
279 w.write_all(payload.as_bytes()).unwrap();
280 w.commit().unwrap();
281 assert_eq!(std::fs::read_to_string(&dest).unwrap(), payload);
282 }
283 }
284
285 #[test]
286 fn test_should_reject_cross_filesystem_path() {
287 // We can't reliably create a different mount in unit tests, but on Unix
288 // we can synthesise the situation by lying about the temp directory.
289 // We pretend the destination's parent and the temp file's parent are
290 // unrelated by feeding `check_same_filesystem` a path whose parent
291 // doesn't exist — that surfaces an Io error, distinguishable from
292 // AtomicCommitCrossFs and good enough to assert the function does NOT
293 // silently succeed when stat fails.
294 let dir = TempDir::new().unwrap();
295 let nonexistent = dir.path().join("does-not-exist").join("inner.snap");
296 // Parent of `nonexistent` is `dir.path()/does-not-exist` which doesn't
297 // exist, so the stat call must fail with NotFound. The function
298 // surfaces it as Io, not as AtomicCommitCrossFs, which is correct.
299 let res = AtomicWriter::open(&nonexistent);
300 assert!(res.is_err());
301 }
302
303 #[test]
304 fn test_should_disarm_guard_to_avoid_post_commit_unlink() {
305 let dir = TempDir::new().unwrap();
306 let dest = dest_in(dir.path(), "x.snap");
307 let mut g = UnlinkOnDrop::new(dest.clone());
308 std::fs::write(&dest, b"persistent").unwrap();
309 g.disarm();
310 drop(g);
311 assert!(dest.exists());
312 }
313}