Skip to main content

kintsugi_core/
snapshot.rs

1//! Snapshots for reversibility ("nothing is unrecoverable").
2//!
3//! Before an allowed destructive command runs, Kintsugi copies the paths it is
4//! likely to touch into a content-addressed store; `kintsugi undo` restores them.
5//! Copies use reflink CoW where the filesystem supports it (APFS/btrfs/ReFS) and
6//! fall back to a plain copy everywhere else.
7//!
8//! Scope (stated plainly): this covers **files that existed before** the command
9//! — restoring overwrites and recreating deletions. It does not remove
10//! newly-created files, and it cannot undo network calls, external APIs, or
11//! already-pushed commits.
12
13use std::path::{Path, PathBuf};
14
15use serde::{Deserialize, Serialize};
16use uuid::Uuid;
17
18use crate::shell;
19use crate::types::ProposedCommand;
20
21/// One captured path within a snapshot.
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
23pub struct Entry {
24    /// The original absolute path.
25    pub original: PathBuf,
26    /// Relative location inside the snapshot store dir.
27    pub stored: String,
28    /// Whether the original was a directory.
29    pub is_dir: bool,
30}
31
32/// A snapshot manifest: enough to restore every captured path.
33#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
34pub struct Manifest {
35    /// Snapshot id (also the store sub-directory name).
36    pub id: String,
37    /// The raw command this snapshot guards.
38    pub command: String,
39    /// Captured paths.
40    pub entries: Vec<Entry>,
41}
42
43/// Predict the filesystem paths a command is likely to touch.
44///
45/// Conservative and dependency-free, but shell-segment aware: the raw line is
46/// split on `;`, `&&`, `||`, `|` and newlines (outside quotes), each segment is
47/// tokenised, and a leading `cd <dir>` updates the effective cwd for the rest of
48/// the line — so `cd build; rm -rf ../dist` resolves `../dist` against `build`,
49/// not the original cwd. Non-flag arguments and redirect targets become
50/// candidates; bogus ones are harmless (only paths that exist are captured).
51pub fn predict_paths(cmd: &ProposedCommand) -> Vec<PathBuf> {
52    let mut out = Vec::new();
53    let mut cwd = cmd.cwd.clone();
54    for segment in split_segments(&cmd.raw) {
55        let tokens = shell::split(&segment);
56        let mut iter = tokens.iter();
57        let Some(prog) = iter.next() else { continue };
58        // A `cd` moves the effective directory for the rest of the line.
59        if prog == "cd" {
60            if let Some(dir) = iter.next() {
61                cwd = resolve(&cwd, dir);
62            }
63            continue;
64        }
65        for tok in iter.by_ref() {
66            if matches!(tok.as_str(), ">" | ">>" | "2>" | "&>" | "2>>") {
67                continue; // the next token is the target; handled below
68            }
69            if let Some(rest) = tok.strip_prefix('>') {
70                let r = rest.trim_start_matches('>');
71                if !r.is_empty() {
72                    out.push(resolve(&cwd, r));
73                }
74                continue;
75            }
76            if tok.starts_with('-') || tok.contains('=') {
77                continue; // a flag or env assignment, not a path
78            }
79            out.push(resolve(&cwd, tok));
80        }
81    }
82    out.sort();
83    out.dedup();
84    out
85}
86
87/// Whether a snapshot can faithfully cover this command — i.e. whether
88/// `kintsugi undo` is an honest promise for it.
89///
90/// Returns `false` when a target is *unbounded* and can't be snapshotted: a glob
91/// (`* ? [`), a shell expansion (`$`, backticks, `~`), the filesystem root, or a
92/// device node. For those, `kintsugi undo` cannot guarantee a rollback and the
93/// filesystem-watcher backstop is the real net — callers must say so honestly.
94pub fn is_fully_reversible(cmd: &ProposedCommand) -> bool {
95    let mut cwd = cmd.cwd.clone();
96    for segment in split_segments(&cmd.raw) {
97        let tokens = shell::split(&segment);
98        let mut iter = tokens.iter();
99        let Some(prog) = iter.next() else { continue };
100        if prog == "cd" {
101            if let Some(dir) = iter.next() {
102                cwd = resolve(&cwd, dir);
103            }
104            continue;
105        }
106        for tok in iter {
107            if tok.starts_with('-') {
108                continue;
109            }
110            // For `key=value` args (env assignments, but also dd's `of=…`), judge
111            // the value — so `dd of=/dev/sda` is caught while `FOO=bar` isn't.
112            let candidate = tok.split_once('=').map(|(_, v)| v).unwrap_or(tok.as_str());
113            if candidate.is_empty() {
114                continue;
115            }
116            // Shell expansions / globs: real targets are unknown ahead of time.
117            if candidate.contains(['*', '?', '[', '$', '`', '~']) {
118                return false;
119            }
120            let resolved = resolve(&cwd, candidate);
121            // The root, a top-level path, or a device can't be meaningfully copied.
122            if resolved == Path::new("/")
123                || resolved.starts_with("/dev")
124                || resolved.parent() == Some(Path::new("/"))
125            {
126                return false;
127            }
128        }
129    }
130    true
131}
132
133/// Split a raw command line into sequential segments on `;`, `&&`, `||`, `|` and
134/// newlines, ignoring separators inside single or double quotes.
135fn split_segments(raw: &str) -> Vec<String> {
136    let mut segments = Vec::new();
137    let mut cur = String::new();
138    let mut quote: Option<char> = None;
139    let mut chars = raw.chars().peekable();
140    while let Some(c) = chars.next() {
141        match quote {
142            Some(q) => {
143                cur.push(c);
144                if c == q {
145                    quote = None;
146                }
147            }
148            None => match c {
149                '\'' | '"' => {
150                    quote = Some(c);
151                    cur.push(c);
152                }
153                ';' | '\n' | '|' => {
154                    if c == '|' && chars.peek() == Some(&'|') {
155                        chars.next();
156                    }
157                    segments.push(std::mem::take(&mut cur));
158                }
159                '&' if chars.peek() == Some(&'&') => {
160                    chars.next();
161                    segments.push(std::mem::take(&mut cur));
162                }
163                _ => cur.push(c),
164            },
165        }
166    }
167    segments.push(cur);
168    segments
169        .into_iter()
170        .filter(|s| !s.trim().is_empty())
171        .collect()
172}
173
174fn resolve(cwd: &Path, arg: &str) -> PathBuf {
175    let a = arg.trim_matches(['"', '\'']);
176    let p = PathBuf::from(a);
177    if p.is_absolute() {
178        p
179    } else {
180        cwd.join(p)
181    }
182}
183
184/// Capture a snapshot of the existing predicted paths into `store_root`.
185///
186/// Returns `Ok(None)` when nothing existed to capture (so callers can skip
187/// recording an empty snapshot). The store sub-directory is `store_root/<id>`.
188pub fn capture(store_root: &Path, cmd: &ProposedCommand) -> std::io::Result<Option<Manifest>> {
189    let candidates = predict_paths(cmd);
190    let existing: Vec<PathBuf> = candidates.into_iter().filter(|p| p.exists()).collect();
191    if existing.is_empty() {
192        return Ok(None);
193    }
194
195    let id = Uuid::new_v4().to_string();
196    let dir = store_root.join(&id);
197    std::fs::create_dir_all(&dir)?;
198
199    let mut entries = Vec::new();
200    for (i, path) in existing.iter().enumerate() {
201        let stored = i.to_string();
202        let dest = dir.join(&stored);
203        let is_dir = path.is_dir();
204        if is_dir {
205            copy_tree(path, &dest)?;
206        } else {
207            copy_file(path, &dest)?;
208        }
209        entries.push(Entry {
210            original: path.clone(),
211            stored,
212            is_dir,
213        });
214    }
215
216    Ok(Some(Manifest {
217        id,
218        command: cmd.raw.clone(),
219        entries,
220    }))
221}
222
223/// Restore every captured path back to its original location.
224///
225/// Each entry is restored atomically: the stored copy is staged to a temp
226/// sibling, the live path is moved aside, then the staged copy is rename-swapped
227/// into place. If the swap fails, the original is rolled back — an interrupted
228/// undo never leaves a path half-written.
229pub fn restore(store_root: &Path, manifest: &Manifest) -> std::io::Result<()> {
230    let dir = store_root.join(&manifest.id);
231    for (idx, entry) in manifest.entries.iter().enumerate() {
232        let src = dir.join(&entry.stored);
233        let dst = &entry.original;
234        if let Some(parent) = dst.parent() {
235            std::fs::create_dir_all(parent)?;
236        }
237        let tmp = restore_tmp_path(dst, &manifest.id, idx);
238        let _ = remove_path(&tmp);
239        if entry.is_dir {
240            copy_tree(&src, &tmp)?;
241        } else {
242            copy_file(&src, &tmp)?;
243        }
244        if dst.exists() {
245            let bak = restore_tmp_path(dst, &manifest.id, idx).with_extension("bak");
246            let _ = remove_path(&bak);
247            std::fs::rename(dst, &bak)?;
248            if let Err(e) = std::fs::rename(&tmp, dst) {
249                std::fs::rename(&bak, dst).ok();
250                let _ = remove_path(&tmp);
251                return Err(e);
252            }
253            let _ = remove_path(&bak);
254        } else {
255            std::fs::rename(&tmp, dst)?;
256        }
257    }
258    Ok(())
259}
260
261fn restore_tmp_path(dst: &Path, id: &str, idx: usize) -> std::path::PathBuf {
262    let name = dst
263        .file_name()
264        .map(|n| n.to_string_lossy().into_owned())
265        .unwrap_or_else(|| "path".into());
266    let parent = dst.parent().unwrap_or_else(|| Path::new("."));
267    parent.join(format!(".kintsugi-restore-{id}-{idx}-{name}"))
268}
269
270fn remove_path(p: &Path) -> std::io::Result<()> {
271    if p.is_dir() {
272        std::fs::remove_dir_all(p)
273    } else if p.exists() {
274        std::fs::remove_file(p)
275    } else {
276        Ok(())
277    }
278}
279
280/// Copy a single file, preferring reflink CoW, falling back to a plain copy.
281fn copy_file(src: &Path, dst: &Path) -> std::io::Result<()> {
282    // `reflink_or_copy` reflinks where supported and copies otherwise.
283    reflink_copy::reflink_or_copy(src, dst).map(|_| ())
284}
285
286/// Recursively copy a directory tree (reflinking each file where possible).
287fn copy_tree(src: &Path, dst: &Path) -> std::io::Result<()> {
288    std::fs::create_dir_all(dst)?;
289    for entry in std::fs::read_dir(src)? {
290        let entry = entry?;
291        let from = entry.path();
292        let to = dst.join(entry.file_name());
293        if entry.file_type()?.is_dir() {
294            copy_tree(&from, &to)?;
295        } else {
296            copy_file(&from, &to)?;
297        }
298    }
299    Ok(())
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305
306    fn cmd(cwd: &Path, raw: &str) -> ProposedCommand {
307        ProposedCommand::new("shim", cwd, shell::split(raw), raw)
308    }
309
310    #[test]
311    fn predicts_non_flag_args_and_redirects() {
312        let cwd = Path::new("/work");
313        let p = predict_paths(&cmd(cwd, "rm -rf build dist"));
314        assert!(p.contains(&PathBuf::from("/work/build")));
315        assert!(p.contains(&PathBuf::from("/work/dist")));
316        // Flags are not paths.
317        assert!(!p.iter().any(|x| x.ends_with("-rf")));
318
319        let r = predict_paths(&cmd(cwd, "echo hi > out.txt"));
320        assert!(r.contains(&PathBuf::from("/work/out.txt")));
321
322        let abs = predict_paths(&cmd(cwd, "rm /etc/hosts"));
323        assert!(abs.contains(&PathBuf::from("/etc/hosts")));
324    }
325
326    #[test]
327    fn predicts_across_segments_and_tracks_cd() {
328        let cwd = Path::new("/work");
329        // The destructive target is relative to the `cd`, not the original cwd.
330        let p = predict_paths(&cmd(cwd, "cd build && rm -rf ../dist"));
331        assert!(
332            p.contains(&PathBuf::from("/work/build/../dist")),
333            "got {p:?}"
334        );
335        // A piped/chained second command's paths are still seen.
336        let q = predict_paths(&cmd(cwd, "ls; rm notes.txt"));
337        assert!(q.contains(&PathBuf::from("/work/notes.txt")));
338        // A pipe `|` also splits segments.
339        let r = predict_paths(&cmd(cwd, "cat a.txt | rm b.txt"));
340        assert!(r.contains(&PathBuf::from("/work/b.txt")), "got {r:?}");
341    }
342
343    #[test]
344    fn predicts_redirect_variants() {
345        let cwd = Path::new("/work");
346        for raw in ["echo x >> log.txt", "echo x 2> err.txt", "echo x >out.txt"] {
347            let p = predict_paths(&cmd(cwd, raw));
348            assert!(
349                p.iter().any(|x| x.to_string_lossy().ends_with(".txt")),
350                "{raw}: {p:?}"
351            );
352        }
353    }
354
355    #[test]
356    fn reversibility_flags_unbounded_targets() {
357        let cwd = Path::new("/work");
358        // Bounded, ordinary targets → reversible.
359        assert!(is_fully_reversible(&cmd(cwd, "rm -rf build")));
360        assert!(is_fully_reversible(&cmd(cwd, "cd src && rm a.txt")));
361        // Globs, expansions, root, and devices → NOT fully reversible.
362        assert!(!is_fully_reversible(&cmd(cwd, "rm -rf *")));
363        assert!(!is_fully_reversible(&cmd(cwd, "rm -rf $HOME/x")));
364        assert!(!is_fully_reversible(&cmd(cwd, "rm -rf /")));
365        assert!(!is_fully_reversible(&cmd(
366            cwd,
367            "dd if=/dev/zero of=/dev/sda"
368        )));
369    }
370
371    #[test]
372    fn captures_and_restores_a_directory_tree() {
373        let tmp = tempfile::tempdir().unwrap();
374        let store = tmp.path().join("store");
375        let work = tmp.path().join("work");
376        std::fs::create_dir_all(work.join("sub/deep")).unwrap();
377        std::fs::write(work.join("sub/a.txt"), b"one").unwrap();
378        std::fs::write(work.join("sub/deep/b.txt"), b"two").unwrap();
379
380        let manifest = capture(&store, &cmd(&work, "rm -rf sub"))
381            .unwrap()
382            .expect("a directory to capture");
383        assert!(manifest.entries.iter().any(|e| e.is_dir), "captured a dir");
384
385        // Delete the whole tree, then restore it from the snapshot.
386        std::fs::remove_dir_all(work.join("sub")).unwrap();
387        restore(&store, &manifest).unwrap();
388        assert_eq!(std::fs::read(work.join("sub/a.txt")).unwrap(), b"one");
389        assert_eq!(std::fs::read(work.join("sub/deep/b.txt")).unwrap(), b"two");
390    }
391
392    #[test]
393    fn capture_and_restore_overwrite() {
394        let tmp = tempfile::tempdir().unwrap();
395        let store = tmp.path().join("store");
396        let work = tmp.path().join("work");
397        std::fs::create_dir_all(&work).unwrap();
398        let file = work.join("data.txt");
399        std::fs::write(&file, b"original").unwrap();
400
401        let manifest = capture(&store, &cmd(&work, "rm data.txt"))
402            .unwrap()
403            .expect("something to capture");
404        assert_eq!(manifest.entries.len(), 1);
405
406        // Simulate the command: overwrite then delete.
407        std::fs::write(&file, b"corrupted").unwrap();
408        restore(&store, &manifest).unwrap();
409        assert_eq!(std::fs::read(&file).unwrap(), b"original");
410
411        // And restore after a delete.
412        std::fs::remove_file(&file).unwrap();
413        restore(&store, &manifest).unwrap();
414        assert_eq!(std::fs::read(&file).unwrap(), b"original");
415    }
416
417    #[test]
418    fn capture_returns_none_when_nothing_exists() {
419        let tmp = tempfile::tempdir().unwrap();
420        let store = tmp.path().join("store");
421        let work = tmp.path().join("work");
422        std::fs::create_dir_all(&work).unwrap();
423        // Targets a path that doesn't exist.
424        let m = capture(&store, &cmd(&work, "rm ghost.txt")).unwrap();
425        assert!(m.is_none());
426    }
427
428    #[test]
429    fn capture_and_restore_directory_tree() {
430        let tmp = tempfile::tempdir().unwrap();
431        let store = tmp.path().join("store");
432        let work = tmp.path().join("work");
433        std::fs::create_dir_all(work.join("src")).unwrap();
434        std::fs::write(work.join("src/a.rs"), b"fn a() {}").unwrap();
435        std::fs::write(work.join("src/b.rs"), b"fn b() {}").unwrap();
436
437        let manifest = capture(&store, &cmd(&work, "rm -rf src")).unwrap().unwrap();
438
439        std::fs::remove_dir_all(work.join("src")).unwrap();
440        restore(&store, &manifest).unwrap();
441        assert_eq!(std::fs::read(work.join("src/a.rs")).unwrap(), b"fn a() {}");
442        assert_eq!(std::fs::read(work.join("src/b.rs")).unwrap(), b"fn b() {}");
443    }
444}