Skip to main content

kintsugi_core/
snapshot.rs

1//! Snapshots for reversibility ("nothing is unrecoverable").
2//!
3//! Before an allowed destructive command runs, Kintsugi copies the paths it is
4//! likely to touch into a content-addressed store; `kintsugi undo` restores them.
5//! Copies use reflink CoW where the filesystem supports it (APFS/btrfs/ReFS) and
6//! fall back to a plain copy everywhere else.
7//!
8//! Scope (stated plainly): this covers **files that existed before** the command
9//! — restoring overwrites and recreating deletions. It does not remove
10//! newly-created files, and it cannot undo network calls, external APIs, or
11//! already-pushed commits.
12
13use std::path::{Path, PathBuf};
14
15use serde::{Deserialize, Serialize};
16use uuid::Uuid;
17
18use crate::shell;
19use crate::types::ProposedCommand;
20
21/// One captured path within a snapshot.
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
23pub struct Entry {
24    /// The original absolute path.
25    pub original: PathBuf,
26    /// Relative location inside the snapshot store dir.
27    pub stored: String,
28    /// Whether the original was a directory.
29    pub is_dir: bool,
30}
31
32/// A snapshot manifest: enough to restore every captured path.
33#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
34pub struct Manifest {
35    /// Snapshot id (also the store sub-directory name).
36    pub id: String,
37    /// The raw command this snapshot guards.
38    pub command: String,
39    /// Captured paths.
40    pub entries: Vec<Entry>,
41}
42
43/// Predict the filesystem paths a command is likely to touch.
44///
45/// Conservative and dependency-free, but shell-segment aware: the raw line is
46/// split on `;`, `&&`, `||`, `|` and newlines (outside quotes), each segment is
47/// tokenised, and a leading `cd <dir>` updates the effective cwd for the rest of
48/// the line — so `cd build; rm -rf ../dist` resolves `../dist` against `build`,
49/// not the original cwd. Non-flag arguments and redirect targets become
50/// candidates; bogus ones are harmless (only paths that exist are captured).
51pub fn predict_paths(cmd: &ProposedCommand) -> Vec<PathBuf> {
52    let mut out = Vec::new();
53    let mut cwd = cmd.cwd.clone();
54    for segment in split_segments(&cmd.raw) {
55        let tokens = shell::split(&segment);
56        let mut iter = tokens.iter();
57        let Some(prog) = iter.next() else { continue };
58        // A `cd` moves the effective directory for the rest of the line.
59        if prog == "cd" {
60            if let Some(dir) = iter.next() {
61                cwd = resolve(&cwd, dir);
62            }
63            continue;
64        }
65        for tok in iter.by_ref() {
66            if matches!(tok.as_str(), ">" | ">>" | "2>" | "&>" | "2>>") {
67                continue; // the next token is the target; handled below
68            }
69            if let Some(rest) = tok.strip_prefix('>') {
70                let r = rest.trim_start_matches('>');
71                if !r.is_empty() {
72                    out.push(resolve(&cwd, r));
73                }
74                continue;
75            }
76            if tok.starts_with('-') || tok.contains('=') {
77                continue; // a flag or env assignment, not a path
78            }
79            out.push(resolve(&cwd, tok));
80        }
81    }
82    out.sort();
83    out.dedup();
84    out
85}
86
87/// Whether a snapshot can faithfully cover this command — i.e. whether
88/// `kintsugi undo` is an honest promise for it.
89///
90/// Returns `false` when a target is *unbounded* and can't be snapshotted: a glob
91/// (`* ? [`), a shell expansion (`$`, backticks, `~`), the filesystem root, or a
92/// device node. For those, `kintsugi undo` cannot guarantee a rollback and the
93/// filesystem-watcher backstop is the real net — callers must say so honestly.
94pub fn is_fully_reversible(cmd: &ProposedCommand) -> bool {
95    let mut cwd = cmd.cwd.clone();
96    for segment in split_segments(&cmd.raw) {
97        let tokens = shell::split(&segment);
98        let mut iter = tokens.iter();
99        let Some(prog) = iter.next() else { continue };
100        if prog == "cd" {
101            if let Some(dir) = iter.next() {
102                cwd = resolve(&cwd, dir);
103            }
104            continue;
105        }
106        for tok in iter {
107            if tok.starts_with('-') {
108                continue;
109            }
110            // For `key=value` args (env assignments, but also dd's `of=…`), judge
111            // the value — so `dd of=/dev/sda` is caught while `FOO=bar` isn't.
112            let candidate = tok.split_once('=').map(|(_, v)| v).unwrap_or(tok.as_str());
113            if candidate.is_empty() {
114                continue;
115            }
116            // Shell expansions / globs: real targets are unknown ahead of time.
117            if candidate.contains(['*', '?', '[', '$', '`', '~']) {
118                return false;
119            }
120            let resolved = resolve(&cwd, candidate);
121            // The root, a top-level path, or a device can't be meaningfully copied.
122            if resolved == Path::new("/")
123                || resolved.starts_with("/dev")
124                || resolved.parent() == Some(Path::new("/"))
125            {
126                return false;
127            }
128        }
129    }
130    true
131}
132
133/// Split a raw command line into sequential segments on `;`, `&&`, `||`, `|` and
134/// newlines, ignoring separators inside single or double quotes.
135fn split_segments(raw: &str) -> Vec<String> {
136    let mut segments = Vec::new();
137    let mut cur = String::new();
138    let mut quote: Option<char> = None;
139    let mut chars = raw.chars().peekable();
140    while let Some(c) = chars.next() {
141        match quote {
142            Some(q) => {
143                cur.push(c);
144                if c == q {
145                    quote = None;
146                }
147            }
148            None => match c {
149                '\'' | '"' => {
150                    quote = Some(c);
151                    cur.push(c);
152                }
153                ';' | '\n' | '|' => {
154                    if c == '|' && chars.peek() == Some(&'|') {
155                        chars.next();
156                    }
157                    segments.push(std::mem::take(&mut cur));
158                }
159                '&' if chars.peek() == Some(&'&') => {
160                    chars.next();
161                    segments.push(std::mem::take(&mut cur));
162                }
163                _ => cur.push(c),
164            },
165        }
166    }
167    segments.push(cur);
168    segments
169        .into_iter()
170        .filter(|s| !s.trim().is_empty())
171        .collect()
172}
173
174fn resolve(cwd: &Path, arg: &str) -> PathBuf {
175    let a = arg.trim_matches(['"', '\'']);
176    let p = PathBuf::from(a);
177    if p.is_absolute() {
178        p
179    } else {
180        cwd.join(p)
181    }
182}
183
184/// Capture a snapshot of the existing predicted paths into `store_root`.
185///
186/// Returns `Ok(None)` when nothing existed to capture (so callers can skip
187/// recording an empty snapshot). The store sub-directory is `store_root/<id>`.
188pub fn capture(store_root: &Path, cmd: &ProposedCommand) -> std::io::Result<Option<Manifest>> {
189    let candidates = predict_paths(cmd);
190    let existing: Vec<PathBuf> = candidates.into_iter().filter(|p| p.exists()).collect();
191    if existing.is_empty() {
192        return Ok(None);
193    }
194
195    let id = Uuid::new_v4().to_string();
196    let dir = store_root.join(&id);
197    std::fs::create_dir_all(&dir)?;
198
199    let mut entries = Vec::new();
200    for (i, path) in existing.iter().enumerate() {
201        let stored = i.to_string();
202        let dest = dir.join(&stored);
203        let is_dir = path.is_dir();
204        if is_dir {
205            copy_tree(path, &dest)?;
206        } else {
207            copy_file(path, &dest)?;
208        }
209        entries.push(Entry {
210            original: path.clone(),
211            stored,
212            is_dir,
213        });
214    }
215
216    Ok(Some(Manifest {
217        id,
218        command: cmd.raw.clone(),
219        entries,
220    }))
221}
222
223/// Restore every captured path back to its original location.
224pub fn restore(store_root: &Path, manifest: &Manifest) -> std::io::Result<()> {
225    let dir = store_root.join(&manifest.id);
226    for entry in &manifest.entries {
227        let src = dir.join(&entry.stored);
228        let dst = &entry.original;
229        // Clear whatever is there now, then restore from the store.
230        if dst.exists() {
231            if dst.is_dir() {
232                std::fs::remove_dir_all(dst)?;
233            } else {
234                std::fs::remove_file(dst)?;
235            }
236        }
237        if let Some(parent) = dst.parent() {
238            std::fs::create_dir_all(parent)?;
239        }
240        if entry.is_dir {
241            copy_tree(&src, dst)?;
242        } else {
243            copy_file(&src, dst)?;
244        }
245    }
246    Ok(())
247}
248
249/// Copy a single file, preferring reflink CoW, falling back to a plain copy.
250fn copy_file(src: &Path, dst: &Path) -> std::io::Result<()> {
251    // `reflink_or_copy` reflinks where supported and copies otherwise.
252    reflink_copy::reflink_or_copy(src, dst).map(|_| ())
253}
254
255/// Recursively copy a directory tree (reflinking each file where possible).
256fn copy_tree(src: &Path, dst: &Path) -> std::io::Result<()> {
257    std::fs::create_dir_all(dst)?;
258    for entry in std::fs::read_dir(src)? {
259        let entry = entry?;
260        let from = entry.path();
261        let to = dst.join(entry.file_name());
262        if entry.file_type()?.is_dir() {
263            copy_tree(&from, &to)?;
264        } else {
265            copy_file(&from, &to)?;
266        }
267    }
268    Ok(())
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274
275    fn cmd(cwd: &Path, raw: &str) -> ProposedCommand {
276        ProposedCommand::new("shim", cwd, shell::split(raw), raw)
277    }
278
279    #[test]
280    fn predicts_non_flag_args_and_redirects() {
281        let cwd = Path::new("/work");
282        let p = predict_paths(&cmd(cwd, "rm -rf build dist"));
283        assert!(p.contains(&PathBuf::from("/work/build")));
284        assert!(p.contains(&PathBuf::from("/work/dist")));
285        // Flags are not paths.
286        assert!(!p.iter().any(|x| x.ends_with("-rf")));
287
288        let r = predict_paths(&cmd(cwd, "echo hi > out.txt"));
289        assert!(r.contains(&PathBuf::from("/work/out.txt")));
290
291        let abs = predict_paths(&cmd(cwd, "rm /etc/hosts"));
292        assert!(abs.contains(&PathBuf::from("/etc/hosts")));
293    }
294
295    #[test]
296    fn predicts_across_segments_and_tracks_cd() {
297        let cwd = Path::new("/work");
298        // The destructive target is relative to the `cd`, not the original cwd.
299        let p = predict_paths(&cmd(cwd, "cd build && rm -rf ../dist"));
300        assert!(
301            p.contains(&PathBuf::from("/work/build/../dist")),
302            "got {p:?}"
303        );
304        // A piped/chained second command's paths are still seen.
305        let q = predict_paths(&cmd(cwd, "ls; rm notes.txt"));
306        assert!(q.contains(&PathBuf::from("/work/notes.txt")));
307        // A pipe `|` also splits segments.
308        let r = predict_paths(&cmd(cwd, "cat a.txt | rm b.txt"));
309        assert!(r.contains(&PathBuf::from("/work/b.txt")), "got {r:?}");
310    }
311
312    #[test]
313    fn predicts_redirect_variants() {
314        let cwd = Path::new("/work");
315        for raw in ["echo x >> log.txt", "echo x 2> err.txt", "echo x >out.txt"] {
316            let p = predict_paths(&cmd(cwd, raw));
317            assert!(
318                p.iter().any(|x| x.to_string_lossy().ends_with(".txt")),
319                "{raw}: {p:?}"
320            );
321        }
322    }
323
324    #[test]
325    fn reversibility_flags_unbounded_targets() {
326        let cwd = Path::new("/work");
327        // Bounded, ordinary targets → reversible.
328        assert!(is_fully_reversible(&cmd(cwd, "rm -rf build")));
329        assert!(is_fully_reversible(&cmd(cwd, "cd src && rm a.txt")));
330        // Globs, expansions, root, and devices → NOT fully reversible.
331        assert!(!is_fully_reversible(&cmd(cwd, "rm -rf *")));
332        assert!(!is_fully_reversible(&cmd(cwd, "rm -rf $HOME/x")));
333        assert!(!is_fully_reversible(&cmd(cwd, "rm -rf /")));
334        assert!(!is_fully_reversible(&cmd(
335            cwd,
336            "dd if=/dev/zero of=/dev/sda"
337        )));
338    }
339
340    #[test]
341    fn captures_and_restores_a_directory_tree() {
342        let tmp = tempfile::tempdir().unwrap();
343        let store = tmp.path().join("store");
344        let work = tmp.path().join("work");
345        std::fs::create_dir_all(work.join("sub/deep")).unwrap();
346        std::fs::write(work.join("sub/a.txt"), b"one").unwrap();
347        std::fs::write(work.join("sub/deep/b.txt"), b"two").unwrap();
348
349        let manifest = capture(&store, &cmd(&work, "rm -rf sub"))
350            .unwrap()
351            .expect("a directory to capture");
352        assert!(manifest.entries.iter().any(|e| e.is_dir), "captured a dir");
353
354        // Delete the whole tree, then restore it from the snapshot.
355        std::fs::remove_dir_all(work.join("sub")).unwrap();
356        restore(&store, &manifest).unwrap();
357        assert_eq!(std::fs::read(work.join("sub/a.txt")).unwrap(), b"one");
358        assert_eq!(std::fs::read(work.join("sub/deep/b.txt")).unwrap(), b"two");
359    }
360
361    #[test]
362    fn capture_and_restore_overwrite() {
363        let tmp = tempfile::tempdir().unwrap();
364        let store = tmp.path().join("store");
365        let work = tmp.path().join("work");
366        std::fs::create_dir_all(&work).unwrap();
367        let file = work.join("data.txt");
368        std::fs::write(&file, b"original").unwrap();
369
370        let manifest = capture(&store, &cmd(&work, "rm data.txt"))
371            .unwrap()
372            .expect("something to capture");
373        assert_eq!(manifest.entries.len(), 1);
374
375        // Simulate the command: overwrite then delete.
376        std::fs::write(&file, b"corrupted").unwrap();
377        restore(&store, &manifest).unwrap();
378        assert_eq!(std::fs::read(&file).unwrap(), b"original");
379
380        // And restore after a delete.
381        std::fs::remove_file(&file).unwrap();
382        restore(&store, &manifest).unwrap();
383        assert_eq!(std::fs::read(&file).unwrap(), b"original");
384    }
385
386    #[test]
387    fn capture_returns_none_when_nothing_exists() {
388        let tmp = tempfile::tempdir().unwrap();
389        let store = tmp.path().join("store");
390        let work = tmp.path().join("work");
391        std::fs::create_dir_all(&work).unwrap();
392        // Targets a path that doesn't exist.
393        let m = capture(&store, &cmd(&work, "rm ghost.txt")).unwrap();
394        assert!(m.is_none());
395    }
396
397    #[test]
398    fn capture_and_restore_directory_tree() {
399        let tmp = tempfile::tempdir().unwrap();
400        let store = tmp.path().join("store");
401        let work = tmp.path().join("work");
402        std::fs::create_dir_all(work.join("src")).unwrap();
403        std::fs::write(work.join("src/a.rs"), b"fn a() {}").unwrap();
404        std::fs::write(work.join("src/b.rs"), b"fn b() {}").unwrap();
405
406        let manifest = capture(&store, &cmd(&work, "rm -rf src")).unwrap().unwrap();
407
408        std::fs::remove_dir_all(work.join("src")).unwrap();
409        restore(&store, &manifest).unwrap();
410        assert_eq!(std::fs::read(work.join("src/a.rs")).unwrap(), b"fn a() {}");
411        assert_eq!(std::fs::read(work.join("src/b.rs")).unwrap(), b"fn b() {}");
412    }
413}