Skip to main content

dbmd_core/
fsx.rs

1//! `fsx` — the one atomic, durable file write for db.md's primary data.
2//!
3//! Every store-state file that holds **primary** data — content records
4//! ([`crate::parser::write_file`]), `log.md` and its archives ([`crate::log`]),
5//! and in-place link rewrites — is replaced through [`write_atomic`]:
6//!
7//! 1. write the bytes to a uniquely-named sibling temp file in the *same*
8//!    directory (`create_new`, so a predictable temp name can never be
9//!    clobbered — closing the temp-clobber race);
10//! 2. `fsync` the temp file;
11//! 3. `rename` it over the destination (atomic on a single filesystem, so a
12//!    concurrent reader never observes a half-written file);
13//! 4. `fsync` the parent directory so the rename survives a crash.
14//!
15//! This is the single primitive for durable writes — never `std::fs::write`,
16//! which is neither atomic nor crash-durable.
17//!
18//! **Not for the index.** `index.md` / `index.jsonl` are *derived, rebuildable*
19//! artifacts on the O(changed) write-through path; they use their own
20//! atomic-but-not-`fsync`'d writer ([`crate::index`]'s `AtomicTemp`) on purpose
21//! — a crash-lost index write is recovered by `dbmd index rebuild`, so paying an
22//! `fsync` per catalog update on the hot loop would be cost without benefit.
23
24use std::fs::{self, File, OpenOptions};
25use std::io::Write;
26use std::path::{Path, PathBuf};
27use std::sync::atomic::{AtomicU64, Ordering};
28use std::time::{SystemTime, UNIX_EPOCH};
29
30/// Atomically and durably replace `path` with `bytes` (see the module docs for
31/// the write/fsync/rename/fsync sequence). The parent directory is created if
32/// missing. On *any* early return between temp-file creation and a successful
33/// rename — a `write_all`/`sync_all` failure (ENOSPC, EIO, quota) as well as a
34/// rename failure — the temp file is cleaned up rather than leaked, via the
35/// [`TempGuard`] `Drop` impl (mirroring `index.rs`'s `AtomicTemp`).
36pub fn write_atomic(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
37    let dir = path.parent().unwrap_or_else(|| Path::new("."));
38    fs::create_dir_all(dir)?;
39
40    let file_name = path
41        .file_name()
42        .and_then(|s| s.to_str())
43        .unwrap_or("dbmd-tmp");
44    let (mut f, mut guard) = create_temp_file(dir, file_name)?;
45
46    // Scope the handle so it is flushed/closed before the rename. A failure here
47    // returns via `?`; `guard` then drops and removes the orphaned temp file.
48    {
49        f.write_all(bytes)?;
50        f.sync_all()?;
51    }
52
53    // The rename either errors (guard drops, cleaning up the temp) or succeeds
54    // (we disarm the guard so it does not remove the now-renamed destination).
55    fs::rename(&guard.path, path)?;
56    guard.disarm();
57    sync_parent_dir(dir);
58    Ok(())
59}
60
61/// Drop-based cleanup for the hidden temp file `write_atomic` creates. While
62/// armed, dropping the guard removes `path`; [`TempGuard::disarm`] is called
63/// only after a successful rename, so the renamed destination is never touched.
64struct TempGuard {
65    path: PathBuf,
66    armed: bool,
67}
68
69impl TempGuard {
70    /// Stop cleaning up `path` on drop — used once the temp has been renamed
71    /// into place and is no longer a stray temp file.
72    fn disarm(&mut self) {
73        self.armed = false;
74    }
75}
76
77impl Drop for TempGuard {
78    fn drop(&mut self) {
79        // Best-effort cleanup if an error path bailed out before the rename.
80        if self.armed {
81            let _ = fs::remove_file(&self.path);
82        }
83    }
84}
85
86/// Create a uniquely-named temp file in `dir` with `create_new` (never clobbers
87/// a predictable name), retrying on the vanishingly-rare collision. The name is
88/// hidden (`.`-prefixed) and tagged with pid + nanos + a process-wide counter so
89/// concurrent writers in the same directory never pick the same path. Returns the
90/// open handle plus an armed [`TempGuard`] so any early return cleans up the temp.
91fn create_temp_file(dir: &Path, file_name: &str) -> std::io::Result<(File, TempGuard)> {
92    static TMP_SEQ: AtomicU64 = AtomicU64::new(0);
93    let pid = std::process::id();
94    let nanos = SystemTime::now()
95        .duration_since(UNIX_EPOCH)
96        .map(|d| d.as_nanos())
97        .unwrap_or(0);
98
99    for _ in 0..128 {
100        let seq = TMP_SEQ.fetch_add(1, Ordering::Relaxed);
101        let tmp = dir.join(format!(".{file_name}.tmp.{pid}.{nanos}.{seq}"));
102        match OpenOptions::new().write(true).create_new(true).open(&tmp) {
103            Ok(file) => {
104                return Ok((
105                    file,
106                    TempGuard {
107                        path: tmp,
108                        armed: true,
109                    },
110                ))
111            }
112            Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => continue,
113            Err(e) => return Err(e),
114        }
115    }
116
117    Err(std::io::Error::new(
118        std::io::ErrorKind::AlreadyExists,
119        "could not allocate a unique dbmd temp file",
120    ))
121}
122
123/// Best-effort `fsync` of the directory so a completed `rename` is durable across
124/// a crash. Non-fatal: some filesystems disallow directory `fsync`.
125fn sync_parent_dir(dir: &Path) {
126    if let Ok(d) = File::open(dir) {
127        let _ = d.sync_all();
128    }
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134    use tempfile::TempDir;
135
136    #[test]
137    fn write_atomic_creates_then_replaces_durably() {
138        let tmp = TempDir::new().unwrap();
139        let target = tmp.path().join("sub").join("file.txt"); // parent missing
140
141        write_atomic(&target, b"first").unwrap();
142        assert_eq!(std::fs::read(&target).unwrap(), b"first");
143
144        // Replace in place — content swaps, no temp files left behind.
145        write_atomic(&target, b"second").unwrap();
146        assert_eq!(std::fs::read(&target).unwrap(), b"second");
147
148        let leftovers: Vec<_> = std::fs::read_dir(target.parent().unwrap())
149            .unwrap()
150            .filter_map(|e| e.ok())
151            .filter(|e| e.file_name().to_string_lossy().contains(".tmp."))
152            .collect();
153        assert!(leftovers.is_empty(), "no temp files may be left behind");
154    }
155
156    #[test]
157    fn write_atomic_is_byte_exact_including_empty() {
158        let tmp = TempDir::new().unwrap();
159        let target = tmp.path().join("empty.txt");
160        write_atomic(&target, b"").unwrap();
161        assert_eq!(std::fs::read(&target).unwrap(), b"");
162    }
163
164    /// Regression for finding #22: an early return between temp-file creation and
165    /// a successful rename (e.g. `write_all`/`sync_all` failing under ENOSPC/EIO)
166    /// must NOT leave the hidden temp file orphaned in the data directory.
167    ///
168    /// Pre-fix, `create_temp_file` handed back a bare `PathBuf` with no `Drop`
169    /// cleanup, so dropping it without a rename — exactly what `?` does on a
170    /// write/sync failure — left the temp on disk. This reconstructs that path by
171    /// dropping the guard without renaming and asserting the temp is gone.
172    #[test]
173    fn regression_armed_guard_removes_temp_on_early_drop() {
174        let dir = TempDir::new().unwrap();
175        let (file, guard) = create_temp_file(dir.path(), "file.txt").unwrap();
176        let tmp_path = guard.path.clone();
177        assert!(
178            tmp_path.exists(),
179            "temp file should exist after create_temp_file"
180        );
181
182        // Simulate a write/sync failure bailing out before the rename: the file
183        // handle and the (still-armed) guard go out of scope without a rename.
184        drop(file);
185        drop(guard);
186
187        assert!(
188            !tmp_path.exists(),
189            "armed guard must remove the orphaned temp file on early drop"
190        );
191        // No stray `.tmp.` files left in the directory.
192        let leftovers: Vec<_> = std::fs::read_dir(dir.path())
193            .unwrap()
194            .filter_map(|e| e.ok())
195            .filter(|e| e.file_name().to_string_lossy().contains(".tmp."))
196            .collect();
197        assert!(leftovers.is_empty(), "no temp files may be left behind");
198    }
199
200    /// Once disarmed (after a successful rename) the guard must NOT delete the
201    /// path it was tracking — otherwise it would clobber the renamed destination.
202    #[test]
203    fn regression_disarmed_guard_leaves_file_intact() {
204        let dir = TempDir::new().unwrap();
205        let (file, mut guard) = create_temp_file(dir.path(), "kept.txt").unwrap();
206        drop(file);
207        let kept = guard.path.clone();
208
209        guard.disarm();
210        drop(guard);
211
212        assert!(
213            kept.exists(),
214            "disarmed guard must leave the renamed destination untouched"
215        );
216    }
217}