Skip to main content

vaultdb_core/
journal.rs

1//! Crash-recovery journal for [`crate::RenameBuilder::execute`].
2//!
3//! ## Why this exists
4//!
5//! `RenameBuilder` performs two operations: (1) rename the source `.md`
6//! file, and (2) rewrite every `[[wikilink]]` pointing at the old name
7//! across the vault. If the process crashes between (1) and (2), the
8//! vault is left in an inconsistent state — the file has been renamed
9//! but other notes still reference the old name.
10//!
11//! The journal makes this recoverable: before any disk change happens,
12//! we write a journal file describing what we're about to do. On the
13//! next mutation (or via an explicit [`crate::Vault::recover`] call),
14//! pending journals are replayed: each is idempotent, so partial
15//! progress is fine.
16//!
17//! ## Layout
18//!
19//! Journals live at `<vault>/.vaultdb/rename-journal/<timestamp>.json`.
20//! The timestamp prefix gives chronological ordering when multiple
21//! journals exist (e.g. several renames committed close together
22//! before a recovery sweep).
23//!
24//! ## Replay state machine
25//!
26//! Three observable states when a journal is loaded:
27//!
28//! | source path | dest path | meaning                         | action                                  |
29//! |-------------|-----------|---------------------------------|-----------------------------------------|
30//! | exists      | missing   | rename never ran                | do the rename, then rewrite backlinks   |
31//! | missing     | exists    | rename ran, rewrites incomplete | rewrite backlinks (idempotent)          |
32//! | missing     | missing   | something deleted both          | log + delete journal (stale)            |
33//! | exists      | exists    | conflict introduced externally  | log + skip rename, rewrite backlinks    |
34//!
35//! Backlink rewrites are themselves idempotent: rewriting a file whose
36//! `[[from]]` references have already been replaced by `[[to]]` is a
37//! no-op (the replace finds no matches and returns identical content).
38
39use std::path::{Path, PathBuf};
40
41use serde::{Deserialize, Serialize};
42
43use crate::error::{Result, VaultdbError};
44use crate::writer;
45
46/// Subdirectory under `.vaultdb/` where rename journals are written.
47pub(crate) const JOURNAL_SUBDIR: &str = "rename-journal";
48
49/// On-disk record describing one in-progress rename.
50#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
51pub struct RenameJournal {
52    /// Path the source `.md` file lived at before the rename.
53    pub source: PathBuf,
54    /// Path the source file is being moved to.
55    pub dest: PathBuf,
56    /// Name (filename without `.md`) used in `[[wikilinks]]` to identify
57    /// this record before the rename.
58    pub from_name: String,
59    /// Name being introduced.
60    pub to_name: String,
61    /// Files known to contain at least one `[[from_name]]` reference
62    /// when the journal was written. May contain extras that no longer
63    /// match (replay handles that gracefully).
64    pub backlinks: Vec<PathBuf>,
65}
66
67fn journal_dir(vault_root: &Path) -> PathBuf {
68    vault_root.join(crate::lock::META_DIR).join(JOURNAL_SUBDIR)
69}
70
71/// Atomically write `journal` to a new file under `<vault>/.vaultdb/rename-journal/`.
72/// Returns the path of the written journal so the caller can delete it
73/// after the rename + rewrites complete successfully.
74pub(crate) fn write(vault_root: &Path, journal: &RenameJournal) -> Result<PathBuf> {
75    let dir = journal_dir(vault_root);
76    std::fs::create_dir_all(&dir).map_err(VaultdbError::Io)?;
77
78    // Filename = nanoseconds-since-epoch. This gives chronological
79    // ordering and avoids collisions even under tight back-to-back writes.
80    // We deliberately don't use a hash of contents: two functionally
81    // identical renames a millisecond apart should still produce two
82    // distinct journals.
83    let stamp = std::time::SystemTime::now()
84        .duration_since(std::time::SystemTime::UNIX_EPOCH)
85        .map(|d| d.as_nanos())
86        .unwrap_or(0);
87    let path = dir.join(format!("{:032}.json", stamp));
88    let serialized = serde_json::to_string_pretty(journal)
89        .map_err(|e| VaultdbError::Internal(format!("serialize rename journal: {}", e)))?;
90    writer::atomic_write(&path, &serialized)?;
91    Ok(path)
92}
93
94/// Best-effort delete of a journal file. Errors are converted to `Ok(())`
95/// in the success path because by the time we're deleting, the rename and
96/// every backlink rewrite have already landed; a delete failure here only
97/// means we'll do a redundant idempotent replay on next startup.
98pub(crate) fn delete(journal_path: &Path) {
99    let _ = std::fs::remove_file(journal_path);
100}
101
102/// List every pending journal under `<vault>/.vaultdb/rename-journal/`,
103/// sorted by filename (chronological because of the timestamp prefix).
104pub fn list_pending(vault_root: &Path) -> Result<Vec<PathBuf>> {
105    let dir = journal_dir(vault_root);
106    if !dir.is_dir() {
107        return Ok(Vec::new());
108    }
109    let mut paths: Vec<PathBuf> = Vec::new();
110    for entry in std::fs::read_dir(&dir).map_err(VaultdbError::Io)? {
111        let entry = entry.map_err(VaultdbError::Io)?;
112        let path = entry.path();
113        if path.extension().and_then(|s| s.to_str()) == Some("json") {
114            paths.push(path);
115        }
116    }
117    paths.sort();
118    Ok(paths)
119}
120
121/// Replay one journal: do the rename if it's still pending, then make
122/// every backlink rewrite idempotent. On success the journal file is
123/// deleted; on error the journal stays for the next replay sweep.
124pub fn replay(journal_path: &Path) -> Result<()> {
125    let raw = std::fs::read_to_string(journal_path).map_err(VaultdbError::Io)?;
126    let journal: RenameJournal = serde_json::from_str(&raw).map_err(|e| {
127        VaultdbError::Internal(format!(
128            "parse rename journal {}: {}",
129            journal_path.display(),
130            e
131        ))
132    })?;
133
134    let source_exists = journal.source.is_file();
135    let dest_exists = journal.dest.is_file();
136
137    match (source_exists, dest_exists) {
138        (true, false) => {
139            // State A: rename never ran. Do it now.
140            std::fs::rename(&journal.source, &journal.dest).map_err(VaultdbError::Io)?;
141        }
142        (false, true) => {
143            // State B: rename ran, backlink rewrites may be incomplete.
144            // Continue to the rewrite loop below.
145        }
146        (false, false) => {
147            // State C: stale journal — both files gone. Nothing to do.
148            // Drop the journal and return.
149            delete(journal_path);
150            return Ok(());
151        }
152        (true, true) => {
153            // State D: source still exists AND dest already exists. The
154            // rename can't proceed (target conflict), but if dest is the
155            // post-rename file from a prior partial run, backlinks may
156            // still need rewriting. Skip the rename, run the rewrite
157            // loop. This is rare (would require an external user
158            // creating a file at the dest path mid-rename); we err on
159            // the side of finishing the partial rename's tail rather
160            // than aborting.
161        }
162    }
163
164    let mut last_err: Option<VaultdbError> = None;
165    for backlink in &journal.backlinks {
166        if !backlink.is_file() {
167            continue; // file was deleted externally; skip
168        }
169        let content = match std::fs::read_to_string(backlink) {
170            Ok(c) => c,
171            Err(e) => {
172                last_err = Some(VaultdbError::Io(e));
173                continue;
174            }
175        };
176        let new_content =
177            crate::mutation::rewrite_wikilinks(&content, &journal.from_name, &journal.to_name);
178        if new_content == content {
179            // Already rewritten (or never matched); idempotent no-op.
180            continue;
181        }
182        if let Err(e) = writer::atomic_write(backlink, &new_content) {
183            last_err = Some(VaultdbError::Io(e));
184        }
185    }
186
187    // Only delete the journal if every rewrite succeeded. If anything
188    // failed, leave the journal so the next sweep retries those files.
189    if let Some(err) = last_err {
190        return Err(err);
191    }
192    delete(journal_path);
193    Ok(())
194}
195
196/// Replay every pending journal. Returns the number of journals
197/// processed (whether successfully replayed and deleted, or processed
198/// as stale and deleted).
199pub fn replay_all(vault_root: &Path) -> Result<usize> {
200    let pending = list_pending(vault_root)?;
201    let mut count = 0;
202    for path in pending {
203        replay(&path)?;
204        count += 1;
205    }
206    Ok(count)
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use std::fs;
213    use tempfile::TempDir;
214
215    fn make_vault() -> TempDir {
216        let dir = TempDir::new().unwrap();
217        fs::create_dir(dir.path().join(".obsidian")).unwrap();
218        fs::create_dir(dir.path().join("notes")).unwrap();
219        dir
220    }
221
222    fn write_md(path: &Path, content: &str) {
223        fs::write(path, content).unwrap();
224    }
225
226    #[test]
227    fn write_and_list_journal() {
228        let dir = make_vault();
229        let journal = RenameJournal {
230            source: dir.path().join("notes/old.md"),
231            dest: dir.path().join("notes/new.md"),
232            from_name: "old".into(),
233            to_name: "new".into(),
234            backlinks: vec![dir.path().join("notes/other.md")],
235        };
236        let path = write(dir.path(), &journal).unwrap();
237        assert!(path.is_file());
238
239        let pending = list_pending(dir.path()).unwrap();
240        assert_eq!(pending.len(), 1);
241        assert_eq!(pending[0], path);
242    }
243
244    #[test]
245    fn replay_state_a_renames_then_rewrites() {
246        // State A: source exists, dest missing. Replay does the rename
247        // and the rewrite.
248        let dir = make_vault();
249        let source = dir.path().join("notes/Stanford.md");
250        let dest = dir.path().join("notes/Stanford University.md");
251        let other = dir.path().join("notes/Application.md");
252
253        write_md(&source, "---\nstatus: active\n---\nMain note.\n");
254        write_md(
255            &other,
256            "---\nrelated:\n  - \"[[Stanford]]\"\n---\nApplied to [[Stanford]] last week.\n",
257        );
258
259        let journal = RenameJournal {
260            source: source.clone(),
261            dest: dest.clone(),
262            from_name: "Stanford".into(),
263            to_name: "Stanford University".into(),
264            backlinks: vec![other.clone()],
265        };
266        let journal_path = write(dir.path(), &journal).unwrap();
267
268        replay(&journal_path).unwrap();
269
270        // Rename done.
271        assert!(!source.exists());
272        assert!(dest.is_file());
273
274        // Backlinks rewritten.
275        let other_content = fs::read_to_string(&other).unwrap();
276        assert!(other_content.contains("[[Stanford University]]"));
277        assert!(!other_content.contains("[[Stanford]]"));
278
279        // Journal cleaned up.
280        assert!(!journal_path.exists());
281    }
282
283    #[test]
284    fn replay_state_b_finishes_partial_backlink_rewrites() {
285        // State B: rename completed, some backlinks rewritten, one still
286        // pointing at the old name.
287        let dir = make_vault();
288        let source = dir.path().join("notes/Stanford.md");
289        let dest = dir.path().join("notes/Stanford University.md");
290        let already = dir.path().join("notes/AlreadyRewritten.md");
291        let pending = dir.path().join("notes/StillOldName.md");
292
293        // Rename has already happened (source missing, dest present).
294        write_md(&dest, "---\n---\nMain note.\n");
295        write_md(&already, "Sees [[Stanford University]] only.\n");
296        write_md(&pending, "Sees [[Stanford]] and needs rewriting.\n");
297
298        let journal = RenameJournal {
299            source,
300            dest: dest.clone(),
301            from_name: "Stanford".into(),
302            to_name: "Stanford University".into(),
303            backlinks: vec![already.clone(), pending.clone()],
304        };
305        let journal_path = write(dir.path(), &journal).unwrap();
306
307        replay(&journal_path).unwrap();
308
309        // The already-rewritten file is unchanged.
310        let a = fs::read_to_string(&already).unwrap();
311        assert_eq!(a, "Sees [[Stanford University]] only.\n");
312
313        // The pending file is rewritten.
314        let p = fs::read_to_string(&pending).unwrap();
315        assert!(p.contains("[[Stanford University]]"));
316        assert!(!p.contains("[[Stanford]] "));
317
318        // Journal cleaned up.
319        assert!(!journal_path.exists());
320    }
321
322    #[test]
323    fn replay_state_c_stale_journal_is_cleaned() {
324        // State C: both source and dest gone (e.g. user manually deleted
325        // both files). Replay should delete the journal and not error.
326        let dir = make_vault();
327        let journal = RenameJournal {
328            source: dir.path().join("notes/Gone.md"),
329            dest: dir.path().join("notes/AlsoGone.md"),
330            from_name: "Gone".into(),
331            to_name: "AlsoGone".into(),
332            backlinks: vec![],
333        };
334        let journal_path = write(dir.path(), &journal).unwrap();
335
336        replay(&journal_path).unwrap();
337
338        // Journal cleaned up; nothing else changed.
339        assert!(!journal_path.exists());
340    }
341
342    #[test]
343    fn replay_is_idempotent_when_called_twice() {
344        // Calling replay_all twice in a row must not break anything.
345        // After the first call, the journals are gone, so the second
346        // is a no-op.
347        let dir = make_vault();
348        let source = dir.path().join("notes/X.md");
349        let dest = dir.path().join("notes/Y.md");
350        write_md(&source, "Body.\n");
351
352        let journal = RenameJournal {
353            source: source.clone(),
354            dest: dest.clone(),
355            from_name: "X".into(),
356            to_name: "Y".into(),
357            backlinks: vec![],
358        };
359        write(dir.path(), &journal).unwrap();
360
361        let n1 = replay_all(dir.path()).unwrap();
362        let n2 = replay_all(dir.path()).unwrap();
363        assert_eq!(n1, 1);
364        assert_eq!(n2, 0);
365        assert!(dest.is_file());
366    }
367
368    #[test]
369    fn replay_all_processes_multiple_journals_in_order() {
370        let dir = make_vault();
371        let a = dir.path().join("notes/A.md");
372        let b = dir.path().join("notes/B.md");
373        let c = dir.path().join("notes/C.md");
374        write_md(&a, "Body.\n");
375
376        // Journal 1: A -> B
377        write(
378            dir.path(),
379            &RenameJournal {
380                source: a.clone(),
381                dest: b.clone(),
382                from_name: "A".into(),
383                to_name: "B".into(),
384                backlinks: vec![],
385            },
386        )
387        .unwrap();
388        // Tiny pause to ensure distinct timestamps for journal 2.
389        std::thread::sleep(std::time::Duration::from_millis(2));
390        // Journal 2: B -> C (uses output of journal 1).
391        write(
392            dir.path(),
393            &RenameJournal {
394                source: b.clone(),
395                dest: c.clone(),
396                from_name: "B".into(),
397                to_name: "C".into(),
398                backlinks: vec![],
399            },
400        )
401        .unwrap();
402
403        let n = replay_all(dir.path()).unwrap();
404        assert_eq!(n, 2);
405        assert!(!a.exists());
406        assert!(!b.exists());
407        assert!(c.is_file(), "expected final state to be C");
408    }
409}