Skip to main content

zagens_runtime_adapters/snapshot/
repo.rs

1//! Side-git repository wrapper for workspace snapshots.
2//!
3//! `SnapshotRepo` shells out to the system `git` binary (we deliberately
4//! avoid `git2` to dodge its LGPL surface). The two paths that matter:
5//!
6//! - `git_dir`  → `~/.deepseek/snapshots/<project_hash>/<worktree_hash>/.git`
7//! - `work_tree` → the user's actual workspace
8//!
9//! Every git invocation passes both `--git-dir` AND `--work-tree`. That is
10//! the single biggest safety mechanism: it guarantees we never accidentally
11//! mutate the user's own `.git` directory. If git can't find the side
12//! repo, the command fails fast instead of falling back to "current
13//! directory".
14
15use std::collections::{HashMap, HashSet};
16use std::io;
17use std::path::{Component, Path, PathBuf};
18use std::process::{Command, Output};
19use std::sync::{Arc, LazyLock, Mutex};
20use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
21
22use super::paths::{ensure_snapshot_dir, snapshot_git_dir};
23use super::size::{DEFAULT_SNAPSHOT_MAX_WORKSPACE_GB, workspace_exceeds_size_limit};
24
25/// Identifier for a snapshot — currently the underlying git commit SHA.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct SnapshotId(pub String);
28
29impl SnapshotId {
30    /// Borrow the SHA as a string slice.
31    pub fn as_str(&self) -> &str {
32        &self.0
33    }
34}
35
36/// A single snapshot record (one row in `git log`).
37#[derive(Debug, Clone)]
38pub struct Snapshot {
39    /// Commit SHA inside the side repo.
40    pub id: SnapshotId,
41    /// Subject line — the label passed to [`SnapshotRepo::snapshot`].
42    pub label: String,
43    /// Author timestamp (Unix seconds).
44    pub timestamp: i64,
45}
46
47/// Wrapper around the per-workspace side-git repo.
48pub struct SnapshotRepo {
49    git_dir: PathBuf,
50    work_tree: PathBuf,
51}
52
53const BUILTIN_EXCLUDES: &str = "\
54# DeepSeek TUI built-in snapshot exclusions
55node_modules/
56target/
57dist/
58build/
59.build/
60.next/
61.nuxt/
62.svelte-kit/
63.turbo/
64.parcel-cache/
65vendor/
66.cargo/
67.rustup/
68.npm/
69.bun/
70.yarn/
71.pnpm-store/
72.cache/
73.venv/
74venv/
75.tox/
76__pycache__/
77*.pyc
78.mypy_cache/
79.pytest_cache/
80.ruff_cache/
81.gradle/
82.m2/
83.local/
84.DS_Store
85
86# Binary and generated artifacts. Snapshots are source rollback checkpoints,
87# not a full binary backup; keeping these out avoids side-repo bloat.
88*.exe
89*.dll
90*.so
91*.dylib
92*.wasm
93*.o
94*.obj
95*.class
96*.pdb
97*.dSYM
98*.zip
99*.tar
100*.tar.gz
101*.tgz
102*.tar.bz2
103*.tar.xz
104*.7z
105*.rar
106*.iso
107*.dmg
108*.bin
109*.mp4
110*.mov
111*.mkv
112*.avi
113*.webm
114*.mp3
115*.wav
116*.flac
117*.aac
118";
119
120impl SnapshotRepo {
121    /// Open or initialize the snapshot repo for `workspace`.
122    ///
123    /// Uses [`DEFAULT_SNAPSHOT_MAX_WORKSPACE_GB`]. Prefer
124    /// [`Self::open_or_init_with_max_gb`] when config supplies a limit.
125    pub fn open_or_init(workspace: &Path) -> io::Result<Self> {
126        Self::open_or_init_with_max_gb(workspace, DEFAULT_SNAPSHOT_MAX_WORKSPACE_GB)
127    }
128
129    /// Open or initialize the snapshot repo, skipping side-git init when the
130    /// workspace on-disk size exceeds `max_workspace_gb`.
131    pub fn open_or_init_with_max_gb(workspace: &Path, max_workspace_gb: f64) -> io::Result<Self> {
132        let work_tree = workspace
133            .canonicalize()
134            .unwrap_or_else(|_| workspace.to_path_buf());
135        if let Some(reason) =
136            unsafe_workspace_snapshot_reason(&work_tree, dirs::home_dir().as_deref())
137        {
138            return Err(io::Error::new(
139                io::ErrorKind::InvalidInput,
140                format!(
141                    "workspace snapshots are disabled for {reason}: {}",
142                    work_tree.display()
143                ),
144            ));
145        }
146
147        if workspace_exceeds_size_limit(&work_tree, max_workspace_gb)? {
148            return Err(io::Error::new(
149                io::ErrorKind::InvalidInput,
150                format!(
151                    "workspace snapshots skipped: tree exceeds max_workspace_gb ({max_workspace_gb} GB): {}",
152                    work_tree.display()
153                ),
154            ));
155        }
156
157        let _ = ensure_snapshot_dir(&work_tree)?;
158        let git_dir = snapshot_git_dir(&work_tree);
159        let snapshot_dir = git_dir.parent().ok_or_else(|| {
160            io::Error::new(io::ErrorKind::InvalidInput, "snapshot dir has no parent")
161        })?;
162
163        init_snapshot_repo_if_needed(snapshot_dir, &git_dir, &work_tree)?;
164
165        write_builtin_excludes(&git_dir)?;
166        Ok(Self { git_dir, work_tree })
167    }
168
169    /// Take a snapshot of the current working tree.
170    ///
171    /// Internally: `git add -A`, `git write-tree`, `git commit-tree`, then
172    /// `git update-ref HEAD <commit>`.
173    /// `git add -A` honours the user's workspace ignore rules while staging
174    /// into the side repo's index.
175    ///
176    /// Returns the snapshot's commit SHA.
177    pub fn snapshot(&self, label: &str) -> io::Result<SnapshotId> {
178        // Stage every tracked + untracked path the workspace exposes.
179        // `--all` here means `add` + `update` + `remove` — the same set
180        // `git status` would show.
181        let add = run_git(&self.git_dir, &self.work_tree, &["add", "-A"])?;
182        if !add.status.success() {
183            return Err(io_other(format!(
184                "git add -A failed: {}",
185                String::from_utf8_lossy(&add.stderr).trim()
186            )));
187        }
188
189        let tree = run_git(&self.git_dir, &self.work_tree, &["write-tree"])?;
190        if !tree.status.success() {
191            return Err(io_other(format!(
192                "git write-tree failed: {}",
193                String::from_utf8_lossy(&tree.stderr).trim()
194            )));
195        }
196        let tree = String::from_utf8_lossy(&tree.stdout).trim().to_string();
197
198        let parent = run_git(
199            &self.git_dir,
200            &self.work_tree,
201            &["rev-parse", "--verify", "HEAD"],
202        )?;
203        let parent = parent
204            .status
205            .success()
206            .then(|| String::from_utf8_lossy(&parent.stdout).trim().to_string())
207            .filter(|s| !s.is_empty());
208
209        let mut args = vec!["commit-tree".to_string(), tree];
210        if let Some(parent) = parent {
211            args.push("-p".to_string());
212            args.push(parent);
213        }
214        args.push("-m".to_string());
215        args.push(label.to_string());
216        let arg_refs: Vec<&str> = args.iter().map(String::as_str).collect();
217
218        // `commit-tree` creates marker commits even when the tree matches its
219        // parent, and it does not run user/global commit hooks.
220        let commit = run_git(&self.git_dir, &self.work_tree, &arg_refs)?;
221        if !commit.status.success() {
222            return Err(io_other(format!(
223                "git commit-tree failed: {}",
224                String::from_utf8_lossy(&commit.stderr).trim()
225            )));
226        }
227        let sha = String::from_utf8_lossy(&commit.stdout).trim().to_string();
228
229        let update = run_git(
230            &self.git_dir,
231            &self.work_tree,
232            &["update-ref", "HEAD", &sha],
233        )?;
234        if !update.status.success() {
235            return Err(io_other(format!(
236                "git update-ref HEAD failed: {}",
237                String::from_utf8_lossy(&update.stderr).trim()
238            )));
239        }
240
241        Ok(SnapshotId(sha))
242    }
243
244    /// Restore the workspace to the state at `id`.
245    ///
246    /// Uses `git checkout <sha> -- :/` which checks out every path in the
247    /// snapshot tree relative to the workspace root. We do NOT touch the
248    /// user's own `.git` — snapshots only contain working-tree files.
249    pub fn restore(&self, id: &SnapshotId) -> io::Result<()> {
250        let current_paths = self.tree_paths("HEAD")?;
251        let target_paths = self.tree_paths(id.as_str())?;
252        let checkout = run_git(
253            &self.git_dir,
254            &self.work_tree,
255            &["checkout", id.as_str(), "--", ":/"],
256        )?;
257        if !checkout.status.success() {
258            return Err(io_other(format!(
259                "git checkout failed: {}",
260                String::from_utf8_lossy(&checkout.stderr).trim()
261            )));
262        }
263        self.remove_paths_missing_from_target(&current_paths, &target_paths)?;
264        Ok(())
265    }
266
267    fn tree_paths(&self, treeish: &str) -> io::Result<HashSet<PathBuf>> {
268        let ls = run_git(
269            &self.git_dir,
270            &self.work_tree,
271            &["ls-tree", "-r", "-z", "--name-only", treeish],
272        )?;
273        if !ls.status.success() {
274            return Err(io_other(format!(
275                "git ls-tree failed: {}",
276                String::from_utf8_lossy(&ls.stderr).trim()
277            )));
278        }
279        Ok(parse_nul_paths(&ls.stdout))
280    }
281
282    fn remove_paths_missing_from_target(
283        &self,
284        current_paths: &HashSet<PathBuf>,
285        target_paths: &HashSet<PathBuf>,
286    ) -> io::Result<()> {
287        for rel in current_paths.difference(target_paths) {
288            if !is_safe_relative_path(rel) {
289                continue;
290            }
291            let path = self.work_tree.join(rel);
292            let Ok(metadata) = std::fs::symlink_metadata(&path) else {
293                continue;
294            };
295            if metadata.file_type().is_dir() {
296                let _ = std::fs::remove_dir(&path);
297            } else {
298                std::fs::remove_file(&path)?;
299            }
300            self.prune_empty_parent_dirs(path.parent());
301        }
302        Ok(())
303    }
304
305    fn prune_empty_parent_dirs(&self, mut dir: Option<&Path>) {
306        while let Some(path) = dir {
307            if path == self.work_tree {
308                break;
309            }
310            if std::fs::remove_dir(path).is_err() {
311                break;
312            }
313            dir = path.parent();
314        }
315    }
316
317    /// List up to `limit` most-recent snapshots, newest first.
318    pub fn list(&self, limit: usize) -> io::Result<Vec<Snapshot>> {
319        // `git log -<n>` is the short form of `--max-count=<n>`; if `limit`
320        // is `usize::MAX` (caller asked for "everything") we pass an empty
321        // count so git defaults to no upper bound.
322        let mut args: Vec<String> = vec!["log".to_string()];
323        if limit < usize::MAX {
324            args.push(format!("--max-count={limit}"));
325        }
326        args.push("--pretty=format:%H%x09%at%x09%s".to_string());
327        args.push("--no-color".to_string());
328        let arg_refs: Vec<&str> = args.iter().map(String::as_str).collect();
329        let log = run_git(&self.git_dir, &self.work_tree, &arg_refs)?;
330        if !log.status.success() {
331            // No commits yet → empty list.
332            return Ok(Vec::new());
333        }
334        let stdout = String::from_utf8_lossy(&log.stdout);
335        let mut out = Vec::new();
336        for line in stdout.lines() {
337            let mut parts = line.splitn(3, '\t');
338            let sha = parts.next().unwrap_or("").to_string();
339            let ts = parts
340                .next()
341                .and_then(|s| s.parse::<i64>().ok())
342                .unwrap_or(0);
343            let subject = parts.next().unwrap_or("").to_string();
344            if sha.is_empty() {
345                continue;
346            }
347            out.push(Snapshot {
348                id: SnapshotId(sha),
349                label: subject,
350                timestamp: ts,
351            });
352        }
353        Ok(out)
354    }
355
356    /// Drop snapshots older than `max_age`, returning the count removed.
357    ///
358    /// Strategy: identify keepable commits (younger than the cutoff),
359    /// reset HEAD to the oldest survivor, then `git reflog expire` +
360    /// `git gc --prune=now` to actually reclaim space. Cheap and avoids
361    /// rewriting history when nothing has aged out.
362    pub fn prune_older_than(&self, max_age: Duration) -> io::Result<usize> {
363        let now = SystemTime::now()
364            .duration_since(UNIX_EPOCH)
365            .map_err(|e| io_other(format!("clock error: {e}")))?
366            .as_secs() as i64;
367        let cutoff = now - max_age.as_secs() as i64;
368
369        let snapshots = self.list(usize::MAX)?;
370        if snapshots.is_empty() {
371            return Ok(0);
372        }
373
374        // Snapshots are newest-first. Find the index of the first one
375        // at-or-older than the cutoff — every entry from that index
376        // onward is a candidate for removal. We use `<=` so a 0-second
377        // retention drops same-second commits (otherwise tests calling
378        // `prune_older_than(Duration::ZERO)` immediately after creating
379        // a snapshot would never prune anything).
380        let cut_index = snapshots.iter().position(|s| s.timestamp <= cutoff);
381        let Some(cut) = cut_index else {
382            return Ok(0);
383        };
384        let removed = snapshots.len() - cut;
385        if removed == 0 {
386            return Ok(0);
387        }
388
389        if cut == 0 {
390            // Every snapshot is older than the cutoff — wipe the repo
391            // entirely so the next snapshot starts a fresh history.
392            // Removing `.git/refs/heads/*` is enough to orphan the old
393            // commits, then gc reclaims them.
394            let refs_dir = self.git_dir.join("refs").join("heads");
395            if refs_dir.exists() {
396                for entry in std::fs::read_dir(&refs_dir)? {
397                    let path = entry?.path();
398                    if path.is_file() {
399                        let _ = std::fs::remove_file(&path);
400                    }
401                }
402            }
403            // Also drop HEAD's packed refs so `git log` returns nothing.
404            let packed = self.git_dir.join("packed-refs");
405            if packed.exists() {
406                let _ = std::fs::remove_file(&packed);
407            }
408        } else {
409            // Reset HEAD to the youngest commit older-than-cutoff's
410            // *predecessor* — i.e. the oldest surviving snapshot.
411            let survivor = &snapshots[cut - 1];
412            let reset = run_git(
413                &self.git_dir,
414                &self.work_tree,
415                &["update-ref", "HEAD", survivor.id.as_str()],
416            )?;
417            if !reset.status.success() {
418                return Err(io_other(format!(
419                    "git update-ref failed: {}",
420                    String::from_utf8_lossy(&reset.stderr).trim()
421                )));
422            }
423        }
424
425        // Reclaim space.
426        let _ = run_git(
427            &self.git_dir,
428            &self.work_tree,
429            &["reflog", "expire", "--expire=now", "--all"],
430        );
431        let _ = run_git(
432            &self.git_dir,
433            &self.work_tree,
434            &["gc", "--prune=now", "--quiet"],
435        );
436
437        Ok(removed)
438    }
439
440    /// Return the side-repo's `.git` directory (for diagnostics).
441    #[allow(dead_code)]
442    pub fn git_dir(&self) -> &Path {
443        &self.git_dir
444    }
445
446    /// Return the work tree path (for diagnostics).
447    #[allow(dead_code)]
448    pub fn work_tree(&self) -> &Path {
449        &self.work_tree
450    }
451}
452
453fn write_builtin_excludes(git_dir: &Path) -> io::Result<()> {
454    let info_dir = git_dir.join("info");
455    std::fs::create_dir_all(&info_dir)?;
456    std::fs::write(info_dir.join("exclude"), BUILTIN_EXCLUDES)
457}
458
459static SNAPSHOT_INIT_MUTEXES: LazyLock<Mutex<HashMap<PathBuf, Arc<Mutex<()>>>>> =
460    LazyLock::new(|| Mutex::new(HashMap::new()));
461
462const SNAPSHOT_INIT_FILE_LOCK_MAX_WAIT: Duration = Duration::from_secs(30);
463const SNAPSHOT_INIT_FILE_LOCK_STALE: Duration = Duration::from_secs(120);
464const SNAPSHOT_INIT_FILE_LOCK_POLL: Duration = Duration::from_millis(50);
465
466fn snapshot_init_lock(snapshot_dir: &Path) -> Arc<Mutex<()>> {
467    let key = snapshot_dir.to_path_buf();
468    let mut map = SNAPSHOT_INIT_MUTEXES
469        .lock()
470        .expect("snapshot init mutex map");
471    map.entry(key)
472        .or_insert_with(|| Arc::new(Mutex::new(())))
473        .clone()
474}
475
476fn snapshot_repo_initialized(git_dir: &Path, work_tree: &Path) -> bool {
477    git_dir.is_dir()
478        && run_git(git_dir, work_tree, &["rev-parse", "--git-dir"])
479            .map(|output| output.status.success())
480            .unwrap_or(false)
481}
482
483fn remove_stale_git_locks(git_dir: &Path) {
484    for name in ["config.lock", "index.lock", "HEAD.lock", "packed-refs.lock"] {
485        let lock = git_dir.join(name);
486        if lock.is_file() {
487            let _ = std::fs::remove_file(&lock);
488        }
489    }
490}
491
492/// Some interrupted inits leave bare-repo metadata directly under the snapshot dir.
493fn remove_legacy_bare_repo_artifacts(snapshot_dir: &Path, git_dir: &Path) {
494    if git_dir.exists() {
495        return;
496    }
497    for name in [
498        "config",
499        "config.lock",
500        "HEAD",
501        "description",
502        "index",
503        "index.lock",
504    ] {
505        let path = snapshot_dir.join(name);
506        if path.is_file() {
507            let _ = std::fs::remove_file(&path);
508        }
509    }
510    for name in ["objects", "refs", "info", "hooks"] {
511        let path = snapshot_dir.join(name);
512        if path.is_dir() {
513            let _ = std::fs::remove_dir_all(&path);
514        }
515    }
516}
517
518fn remove_broken_git_dir(git_dir: &Path, work_tree: &Path) -> io::Result<()> {
519    if !git_dir.exists() {
520        return Ok(());
521    }
522    if snapshot_repo_initialized(git_dir, work_tree) {
523        return Ok(());
524    }
525    remove_stale_git_locks(git_dir);
526    if snapshot_repo_initialized(git_dir, work_tree) {
527        return Ok(());
528    }
529    std::fs::remove_dir_all(git_dir)
530}
531
532fn try_git_init(snapshot_dir: &Path) -> io::Result<Output> {
533    Command::new("git")
534        .arg("init")
535        .arg("--quiet")
536        .arg(snapshot_dir)
537        .output()
538        .map_err(|e| io_other(format!("failed to spawn git init: {e}")))
539}
540
541fn git_init_error(output: &Output) -> String {
542    let stderr = String::from_utf8_lossy(&output.stderr);
543    let stderr = stderr.trim();
544    if stderr.is_empty() {
545        String::from_utf8_lossy(&output.stdout).trim().to_string()
546    } else {
547        stderr.to_string()
548    }
549}
550
551fn is_git_config_lock_error(message: &str) -> bool {
552    message.contains("could not lock config file") || message.contains("File exists")
553}
554
555fn apply_snapshot_repo_config(git_dir: &Path, work_tree: &Path) {
556    let _ = run_git(
557        git_dir,
558        work_tree,
559        &["config", "user.name", "deepseek-snapshots"],
560    );
561    let _ = run_git(
562        git_dir,
563        work_tree,
564        &["config", "user.email", "snapshots@deepseek-tui.local"],
565    );
566    let _ = run_git(git_dir, work_tree, &["config", "gc.auto", "0"]);
567    let _ = run_git(git_dir, work_tree, &["config", "core.autocrlf", "false"]);
568}
569
570fn acquire_snapshot_init_file_lock(snapshot_dir: &Path) -> io::Result<PathBuf> {
571    std::fs::create_dir_all(snapshot_dir)?;
572    let lock_path = snapshot_dir.join(".snapshot-init.lock");
573    let started = Instant::now();
574    loop {
575        match std::fs::OpenOptions::new()
576            .write(true)
577            .create_new(true)
578            .open(&lock_path)
579        {
580            Ok(_) => return Ok(lock_path),
581            Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
582                if file_older_than(&lock_path, SNAPSHOT_INIT_FILE_LOCK_STALE) {
583                    let _ = std::fs::remove_file(&lock_path);
584                    continue;
585                }
586                if started.elapsed() >= SNAPSHOT_INIT_FILE_LOCK_MAX_WAIT {
587                    return Err(io_other(
588                        "snapshot repo init timed out waiting for another initializer",
589                    ));
590                }
591                std::thread::sleep(SNAPSHOT_INIT_FILE_LOCK_POLL);
592            }
593            Err(e) => return Err(e),
594        }
595    }
596}
597
598fn file_older_than(path: &Path, max_age: Duration) -> bool {
599    let Ok(meta) = std::fs::metadata(path) else {
600        return true;
601    };
602    let Ok(modified) = meta.modified() else {
603        return true;
604    };
605    modified
606        .duration_since(UNIX_EPOCH)
607        .ok()
608        .and_then(|t| {
609            SystemTime::now()
610                .duration_since(UNIX_EPOCH)
611                .ok()
612                .map(|now| now - t)
613        })
614        .is_some_and(|age| age >= max_age)
615}
616
617fn init_snapshot_repo_if_needed(
618    snapshot_dir: &Path,
619    git_dir: &Path,
620    work_tree: &Path,
621) -> io::Result<()> {
622    if snapshot_repo_initialized(git_dir, work_tree) {
623        return Ok(());
624    }
625
626    let lock = snapshot_init_lock(snapshot_dir);
627    let _guard = lock
628        .lock()
629        .map_err(|_| io_other("snapshot init mutex poisoned"))?;
630
631    if snapshot_repo_initialized(git_dir, work_tree) {
632        return Ok(());
633    }
634
635    let file_lock = acquire_snapshot_init_file_lock(snapshot_dir)?;
636    let init_result = init_snapshot_repo_locked(snapshot_dir, git_dir, work_tree);
637    let _ = std::fs::remove_file(&file_lock);
638    init_result
639}
640
641fn init_snapshot_repo_locked(
642    snapshot_dir: &Path,
643    git_dir: &Path,
644    work_tree: &Path,
645) -> io::Result<()> {
646    if snapshot_repo_initialized(git_dir, work_tree) {
647        return Ok(());
648    }
649
650    remove_legacy_bare_repo_artifacts(snapshot_dir, git_dir);
651    remove_stale_git_locks(git_dir);
652    remove_broken_git_dir(git_dir, work_tree)?;
653
654    let init = try_git_init(snapshot_dir)?;
655    if init.status.success() {
656        apply_snapshot_repo_config(git_dir, work_tree);
657        return Ok(());
658    }
659
660    let err = git_init_error(&init);
661    if snapshot_repo_initialized(git_dir, work_tree) {
662        apply_snapshot_repo_config(git_dir, work_tree);
663        return Ok(());
664    }
665
666    if is_git_config_lock_error(&err) {
667        remove_stale_git_locks(git_dir);
668        remove_broken_git_dir(git_dir, work_tree)?;
669        let retry = try_git_init(snapshot_dir)?;
670        if retry.status.success() || snapshot_repo_initialized(git_dir, work_tree) {
671            apply_snapshot_repo_config(git_dir, work_tree);
672            return Ok(());
673        }
674        return Err(io_other(format!(
675            "git init failed: {}",
676            git_init_error(&retry)
677        )));
678    }
679
680    Err(io_other(format!("git init failed: {err}")))
681}
682
683fn run_git(git_dir: &Path, work_tree: &Path, args: &[&str]) -> io::Result<Output> {
684    Command::new("git")
685        .arg("--git-dir")
686        .arg(git_dir)
687        .arg("--work-tree")
688        .arg(work_tree)
689        .args(args)
690        .output()
691}
692
693fn io_other(msg: impl Into<String>) -> io::Error {
694    io::Error::other(msg.into())
695}
696
697fn unsafe_workspace_snapshot_reason(workspace: &Path, home: Option<&Path>) -> Option<&'static str> {
698    let workspace = normalize_path_for_safety(workspace);
699    if is_filesystem_root(&workspace) {
700        return Some("filesystem root");
701    }
702
703    if is_home_directory(&workspace, home) {
704        return Some("home directory");
705    }
706
707    let home = home.map(normalize_path_for_safety)?;
708    if workspace.parent() == Some(home.as_path()) {
709        let name = workspace.file_name().and_then(|name| name.to_str());
710        if matches!(
711            name,
712            Some(
713                "Desktop" | "Documents" | "Downloads" | "Library" | "Movies" | "Music" | "Pictures"
714            )
715        ) {
716            return Some("home collection directory");
717        }
718    }
719
720    None
721}
722
723fn normalize_path_for_safety(path: &Path) -> PathBuf {
724    path.canonicalize().unwrap_or_else(|_| path.to_path_buf())
725}
726
727fn is_filesystem_root(path: &Path) -> bool {
728    path.parent().is_none()
729}
730
731fn is_home_directory(work_tree: &Path, home: Option<&Path>) -> bool {
732    let Some(home) = home else {
733        return false;
734    };
735
736    let home_canonical = home.canonicalize().unwrap_or_else(|_| home.to_path_buf());
737    work_tree == home_canonical
738}
739
740fn parse_nul_paths(bytes: &[u8]) -> HashSet<PathBuf> {
741    bytes
742        .split(|b| *b == 0)
743        .filter(|chunk| !chunk.is_empty())
744        .map(|chunk| PathBuf::from(String::from_utf8_lossy(chunk).into_owned()))
745        .collect()
746}
747
748fn is_safe_relative_path(path: &Path) -> bool {
749    !path.as_os_str().is_empty()
750        && path
751            .components()
752            .all(|component| matches!(component, Component::Normal(_)))
753}
754
755#[cfg(test)]
756mod tests {
757    use super::*;
758    use crate::snapshot::paths::snapshot_git_dir;
759    use crate::test_support::lock_test_env;
760    use std::sync::MutexGuard;
761    use tempfile::tempdir;
762
763    /// Holds the home directory pinned to a tempdir for the lifetime of a test. Also
764    /// owns the process-wide env-var mutex so tests across modules
765    /// don't trample each other's home env vars.
766    pub(super) struct ScopedHome {
767        prev_vars: Vec<(&'static str, Option<std::ffi::OsString>)>,
768        _guard: MutexGuard<'static, ()>,
769    }
770    impl Drop for ScopedHome {
771        fn drop(&mut self) {
772            // SAFETY: process-wide lock still held.
773            unsafe {
774                for (key, prev) in self.prev_vars.drain(..) {
775                    match prev {
776                        Some(value) => std::env::set_var(key, value),
777                        None => std::env::remove_var(key),
778                    }
779                }
780            }
781        }
782    }
783    pub(super) fn scoped_home(home: &Path) -> ScopedHome {
784        let guard = lock_test_env();
785        let prev_vars = ["HOME", "USERPROFILE", "HOMEDRIVE", "HOMEPATH"]
786            .into_iter()
787            .map(|key| (key, std::env::var_os(key)))
788            .collect();
789        // SAFETY: serialised by the global env lock.
790        unsafe {
791            std::env::set_var("HOME", home);
792            std::env::set_var("USERPROFILE", home);
793            std::env::remove_var("HOMEDRIVE");
794            std::env::remove_var("HOMEPATH");
795        }
796        ScopedHome {
797            prev_vars,
798            _guard: guard,
799        }
800    }
801
802    /// Build a side-repo whose snapshot dir lives under the same
803    /// tempdir we're using for `HOME` — so the inner `dirs::home_dir()`
804    /// lookup stays inside our sandbox. Returns the guard alongside so
805    /// the caller can keep HOME pinned for the rest of the test.
806    fn make_repo(tmp: &Path) -> (SnapshotRepo, ScopedHome) {
807        let workspace = tmp.join("workspace");
808        std::fs::create_dir_all(&workspace).unwrap();
809        let guard = scoped_home(tmp);
810        let repo = SnapshotRepo::open_or_init(&workspace).expect("open_or_init");
811        (repo, guard)
812    }
813
814    #[test]
815    fn snapshot_creates_commit_in_side_repo_only() {
816        let tmp = tempdir().unwrap();
817        let (repo, _home) = make_repo(tmp.path());
818        std::fs::write(repo.work_tree().join("a.txt"), b"alpha").unwrap();
819
820        let id = repo.snapshot("pre-turn:1").expect("snapshot");
821        assert_eq!(id.as_str().len(), 40);
822
823        let list = repo.list(10).expect("list");
824        assert_eq!(list.len(), 1);
825        assert_eq!(list[0].label, "pre-turn:1");
826
827        // The user's workspace must NOT have a real `.git` because we
828        // never created one in their workspace — only in the side dir.
829        assert!(!repo.work_tree().join(".git").exists());
830    }
831
832    #[test]
833    fn restore_reverts_workspace_files() {
834        let tmp = tempdir().unwrap();
835        let (repo, _home) = make_repo(tmp.path());
836        let f = repo.work_tree().join("file.txt");
837
838        std::fs::write(&f, b"original").unwrap();
839        let id = repo.snapshot("pre-turn:1").expect("snapshot");
840
841        std::fs::write(&f, b"clobbered").unwrap();
842        repo.snapshot("post-turn:1").expect("snapshot 2");
843
844        repo.restore(&id).expect("restore");
845        let after = std::fs::read_to_string(&f).unwrap();
846        assert_eq!(after, "original");
847    }
848
849    #[test]
850    fn restore_removes_files_added_after_target_snapshot() {
851        let tmp = tempdir().unwrap();
852        let (repo, _home) = make_repo(tmp.path());
853        let original = repo.work_tree().join("original.txt");
854        let added = repo.work_tree().join("added.txt");
855
856        std::fs::write(&original, b"original").unwrap();
857        let id = repo.snapshot("pre-turn:1").expect("snapshot");
858
859        std::fs::write(&added, b"new file").unwrap();
860        repo.snapshot("post-turn:1").expect("snapshot 2");
861
862        repo.restore(&id).expect("restore");
863        assert!(original.exists());
864        assert!(!added.exists(), "restore must remove tracked added files");
865    }
866
867    #[test]
868    fn snapshot_and_restore_do_not_move_user_git_head() {
869        let tmp = tempdir().unwrap();
870        let workspace = tmp.path().join("workspace");
871        std::fs::create_dir_all(&workspace).unwrap();
872        Command::new("git")
873            .arg("-C")
874            .arg(&workspace)
875            .arg("init")
876            .arg("--quiet")
877            .status()
878            .unwrap();
879        std::fs::write(workspace.join("tracked.txt"), b"committed").unwrap();
880        Command::new("git")
881            .arg("-C")
882            .arg(&workspace)
883            .arg("add")
884            .arg("tracked.txt")
885            .status()
886            .unwrap();
887        Command::new("git")
888            .arg("-C")
889            .arg(&workspace)
890            .arg("-c")
891            .arg("user.name=user")
892            .arg("-c")
893            .arg("user.email=user@example.test")
894            .arg("commit")
895            .arg("--quiet")
896            .arg("-m")
897            .arg("init")
898            .status()
899            .unwrap();
900        let user_head_before = Command::new("git")
901            .arg("-C")
902            .arg(&workspace)
903            .args(["rev-parse", "HEAD"])
904            .output()
905            .unwrap()
906            .stdout;
907
908        let _home = scoped_home(tmp.path());
909        let repo = SnapshotRepo::open_or_init(&workspace).unwrap();
910        std::fs::write(workspace.join("tracked.txt"), b"dirty-before").unwrap();
911        let id = repo.snapshot("pre-turn:1").unwrap();
912        std::fs::write(workspace.join("tracked.txt"), b"dirty-after").unwrap();
913        repo.snapshot("post-turn:1").unwrap();
914        repo.restore(&id).unwrap();
915
916        let user_head_after = Command::new("git")
917            .arg("-C")
918            .arg(&workspace)
919            .args(["rev-parse", "HEAD"])
920            .output()
921            .unwrap()
922            .stdout;
923        assert_eq!(user_head_after, user_head_before);
924        assert_eq!(
925            std::fs::read_to_string(workspace.join("tracked.txt")).unwrap(),
926            "dirty-before"
927        );
928    }
929
930    #[test]
931    fn list_respects_limit() {
932        let tmp = tempdir().unwrap();
933        let (repo, _home) = make_repo(tmp.path());
934        for i in 0..5 {
935            std::fs::write(repo.work_tree().join("f.txt"), format!("v{i}")).unwrap();
936            repo.snapshot(&format!("turn:{i}")).unwrap();
937        }
938        let three = repo.list(3).unwrap();
939        assert_eq!(three.len(), 3);
940        // Newest first.
941        assert_eq!(three[0].label, "turn:4");
942    }
943
944    #[test]
945    fn prune_drops_snapshots_older_than_threshold() {
946        let tmp = tempdir().unwrap();
947        let (repo, _home) = make_repo(tmp.path());
948        std::fs::write(repo.work_tree().join("f.txt"), "v0").unwrap();
949        repo.snapshot("turn:0").unwrap();
950
951        // Wait one second so the snapshot's commit timestamp is strictly
952        // in the past relative to the prune call's "now" — otherwise
953        // same-second comparisons make the assertion flaky.
954        std::thread::sleep(Duration::from_millis(1100));
955
956        let removed = repo.prune_older_than(Duration::from_secs(0)).unwrap();
957        assert!(removed >= 1, "expected at least 1 pruned, got {removed}");
958
959        // After pruning everything, the next snapshot should start a
960        // fresh history.
961        std::fs::write(repo.work_tree().join("f.txt"), "v1").unwrap();
962        repo.snapshot("turn:1").unwrap();
963        let list = repo.list(10).unwrap();
964        assert_eq!(list.len(), 1);
965        assert_eq!(list[0].label, "turn:1");
966    }
967
968    #[test]
969    fn snapshot_respects_workspace_gitignore() {
970        let tmp = tempdir().unwrap();
971        let (repo, _home) = make_repo(tmp.path());
972        std::fs::write(repo.work_tree().join(".gitignore"), "ignored.txt\n").unwrap();
973        std::fs::write(repo.work_tree().join("ignored.txt"), b"secret").unwrap();
974        std::fs::write(repo.work_tree().join("kept.txt"), b"public").unwrap();
975
976        let id = repo.snapshot("pre-turn:1").expect("snapshot");
977
978        // `git ls-tree` against the snapshot's commit shouldn't list ignored.txt.
979        let ls = run_git(
980            repo.git_dir(),
981            repo.work_tree(),
982            &["ls-tree", "-r", "--name-only", id.as_str()],
983        )
984        .expect("ls-tree");
985        let names = String::from_utf8_lossy(&ls.stdout);
986        assert!(names.contains("kept.txt"), "kept.txt missing: {names}");
987        assert!(
988            !names.contains("ignored.txt"),
989            "ignored.txt should not be in snapshot: {names}",
990        );
991    }
992
993    #[test]
994    fn unsafe_workspace_rejects_home_directory_workspace() {
995        let tmp = tempdir().unwrap();
996        let home = tmp.path();
997
998        assert_eq!(
999            unsafe_workspace_snapshot_reason(home, Some(home)),
1000            Some("home directory")
1001        );
1002    }
1003
1004    #[test]
1005    fn unsafe_workspace_rejects_home_collection_directories() {
1006        let tmp = tempdir().unwrap();
1007        let home = tmp.path();
1008        let desktop = tmp.path().join("Desktop");
1009        std::fs::create_dir_all(&desktop).unwrap();
1010
1011        assert_eq!(
1012            unsafe_workspace_snapshot_reason(&desktop, Some(home)),
1013            Some("home collection directory")
1014        );
1015    }
1016
1017    #[test]
1018    fn unsafe_workspace_allows_project_directories_under_home() {
1019        let tmp = tempdir().unwrap();
1020        let home = tmp.path();
1021        let workspace = tmp.path().join("code").join("project");
1022        std::fs::create_dir_all(&workspace).unwrap();
1023
1024        assert_eq!(
1025            unsafe_workspace_snapshot_reason(&workspace, Some(home)),
1026            None
1027        );
1028    }
1029
1030    #[test]
1031    fn snapshot_respects_builtin_excludes() {
1032        let tmp = tempdir().unwrap();
1033        let (repo, _home) = make_repo(tmp.path());
1034        std::fs::create_dir_all(repo.work_tree().join("node_modules/pkg")).unwrap();
1035        std::fs::create_dir_all(repo.work_tree().join(".next/cache")).unwrap();
1036        std::fs::create_dir_all(repo.work_tree().join("src")).unwrap();
1037        std::fs::write(
1038            repo.work_tree().join("node_modules/pkg/index.js"),
1039            b"generated",
1040        )
1041        .unwrap();
1042        std::fs::write(repo.work_tree().join(".next/cache/chunk.bin"), b"generated").unwrap();
1043        std::fs::write(repo.work_tree().join("debug.wasm"), b"binary").unwrap();
1044        std::fs::write(repo.work_tree().join("src/main.rs"), b"fn main() {}").unwrap();
1045
1046        let excludes = std::fs::read_to_string(repo.git_dir().join("info/exclude")).unwrap();
1047        assert!(excludes.contains("node_modules/"));
1048        assert!(excludes.contains(".next/"));
1049        assert!(excludes.contains("*.wasm"));
1050
1051        let id = repo.snapshot("pre-turn:1").expect("snapshot");
1052        let ls = run_git(
1053            repo.git_dir(),
1054            repo.work_tree(),
1055            &["ls-tree", "-r", "--name-only", id.as_str()],
1056        )
1057        .expect("ls-tree");
1058        let names = String::from_utf8_lossy(&ls.stdout);
1059        assert!(
1060            names.contains("src/main.rs"),
1061            "src/main.rs missing: {names}"
1062        );
1063        assert!(
1064            !names.contains("node_modules"),
1065            "node_modules should not be in snapshot: {names}",
1066        );
1067        assert!(
1068            !names.contains(".next"),
1069            ".next should not be in snapshot: {names}",
1070        );
1071        assert!(
1072            !names.contains("debug.wasm"),
1073            "binary artifacts should not be in snapshot: {names}",
1074        );
1075    }
1076
1077    #[test]
1078    fn open_or_init_recovers_from_stale_config_lock() {
1079        let tmp = tempdir().unwrap();
1080        let workspace = tmp.path().join("workspace");
1081        std::fs::create_dir_all(&workspace).unwrap();
1082        let guard = scoped_home(tmp.path());
1083        let git_dir = snapshot_git_dir(&workspace);
1084        let snapshot_dir = git_dir.parent().unwrap();
1085        std::fs::create_dir_all(snapshot_dir).unwrap();
1086        std::fs::create_dir_all(&git_dir).unwrap();
1087        std::fs::write(git_dir.join("config.lock"), b"stale").unwrap();
1088
1089        let repo = SnapshotRepo::open_or_init(&workspace).expect("open_or_init");
1090        assert!(repo.git_dir().join("config").exists());
1091        assert!(!repo.git_dir().join("config.lock").exists());
1092        drop(guard);
1093    }
1094
1095    #[test]
1096    fn open_or_init_rejects_workspace_over_max_gb() {
1097        let tmp = tempdir().unwrap();
1098        let workspace = tmp.path().join("workspace");
1099        std::fs::create_dir_all(&workspace).unwrap();
1100        std::fs::write(workspace.join("big.bin"), vec![0u8; 2048]).unwrap();
1101        let err = SnapshotRepo::open_or_init_with_max_gb(&workspace, 0.000001)
1102            .err()
1103            .expect("should reject huge workspace");
1104        assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
1105        assert!(err.to_string().contains("max_workspace_gb"), "{}", err);
1106    }
1107
1108    #[test]
1109    fn open_or_init_is_idempotent() {
1110        let tmp = tempdir().unwrap();
1111        let (_r, _h) = make_repo(tmp.path());
1112        // Second open should not panic and should reuse the existing
1113        // `.git`. We re-open via the public API rather than make_repo to
1114        // avoid double-acquiring HOME (the guard would deadlock).
1115        drop((_r, _h));
1116        let (_r2, _h2) = make_repo(tmp.path());
1117    }
1118
1119    #[test]
1120    fn home_directory_guard_matches_canonical_paths() {
1121        let tmp = tempdir().unwrap();
1122        let home = tmp.path();
1123        let home_canonical = home.canonicalize().unwrap();
1124        let workspace = home.join("workspace");
1125        std::fs::create_dir_all(&workspace).unwrap();
1126        let workspace_canonical = workspace.canonicalize().unwrap();
1127
1128        assert!(is_home_directory(&home_canonical, Some(home)));
1129        assert!(!is_home_directory(&workspace_canonical, Some(home)));
1130        assert!(!is_home_directory(&home_canonical, None));
1131    }
1132}