Skip to main content

maw/backend/
copy.rs

1//! Plain recursive-copy workspace backend (universal fallback).
2//!
3//! Creates workspaces by extracting the epoch's git tree into a fresh
4//! directory using `git archive | tar -x`. No `CoW`, no overlayfs — works on
5//! any filesystem and any platform.
6//!
7//! # Directory layout
8//!
9//! ```text
10//! repo-root/
11//! └── ws/
12//!     └── <name>/         ← workspace (full copy of epoch tree)
13//!         └── .maw-epoch  ← stores the base epoch OID (40 hex chars + newline)
14//! ```
15//!
16//! # Performance note
17//!
18//! Every workspace create is an O(repo-size) operation. For repos with fewer
19//! than 30k files this is acceptable; for larger repos prefer the `reflink`
20//! or `overlay` backend.
21
22use std::collections::HashSet;
23use std::fmt;
24use std::path::{Path, PathBuf};
25use std::process::{Command, Stdio};
26
27use super::{SnapshotResult, WorkspaceBackend, WorkspaceStatus};
28use crate::model::types::{EpochId, WorkspaceId, WorkspaceInfo, WorkspaceMode, WorkspaceState};
29
30// ---------------------------------------------------------------------------
31// Constants
32// ---------------------------------------------------------------------------
33
34/// Hidden metadata file written into each workspace root.
35///
36/// Contains the base epoch OID (exactly 40 lowercase hex characters) followed
37/// by a newline. This file is excluded from snapshot comparisons.
38const EPOCH_FILE: &str = ".maw-epoch";
39
40// ---------------------------------------------------------------------------
41// Error type
42// ---------------------------------------------------------------------------
43
44/// Errors from the plain-copy workspace backend.
45#[derive(Debug)]
46pub enum CopyBackendError {
47    /// An I/O error occurred.
48    Io(std::io::Error),
49    /// An external command (`git archive`, `tar`) failed.
50    Command {
51        command: String,
52        stderr: String,
53        exit_code: Option<i32>,
54    },
55    /// Workspace not found.
56    NotFound { name: String },
57    /// The workspace is missing the `.maw-epoch` metadata file.
58    MissingEpochFile { workspace: String },
59    /// The epoch ID stored in `.maw-epoch` is malformed.
60    InvalidEpochFile { workspace: String, reason: String },
61}
62
63impl fmt::Display for CopyBackendError {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        match self {
66            Self::Io(e) => write!(f, "I/O error: {e}"),
67            Self::Command {
68                command,
69                stderr,
70                exit_code,
71            } => {
72                write!(f, "`{command}` failed")?;
73                if let Some(code) = exit_code {
74                    write!(f, " (exit code {code})")?;
75                }
76                if !stderr.is_empty() {
77                    write!(f, ": {stderr}")?;
78                }
79                Ok(())
80            }
81            Self::NotFound { name } => write!(f, "workspace '{name}' not found"),
82            Self::MissingEpochFile { workspace } => {
83                write!(
84                    f,
85                    "workspace '{workspace}' is missing {EPOCH_FILE}; \
86                     the workspace may be corrupted"
87                )
88            }
89            Self::InvalidEpochFile { workspace, reason } => {
90                write!(
91                    f,
92                    "workspace '{workspace}' has an invalid {EPOCH_FILE}: {reason}"
93                )
94            }
95        }
96    }
97}
98
99impl std::error::Error for CopyBackendError {
100    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
101        match self {
102            Self::Io(e) => Some(e),
103            _ => None,
104        }
105    }
106}
107
108impl From<std::io::Error> for CopyBackendError {
109    fn from(e: std::io::Error) -> Self {
110        Self::Io(e)
111    }
112}
113
114// ---------------------------------------------------------------------------
115// CopyBackend
116// ---------------------------------------------------------------------------
117
118/// A workspace backend that extracts epoch trees via `git archive`.
119///
120/// Each workspace is a plain copy of the repository tree at the base epoch.
121/// Changes to the workspace are detected by walking the directory and
122/// comparing against the base epoch via `git diff`.
123///
124/// # Thread safety
125///
126/// `CopyBackend` is `Send + Sync`. All state lives on the filesystem.
127pub struct CopyBackend {
128    /// Absolute path to the repository root (contains `.git/`, `ws/`).
129    root: PathBuf,
130}
131
132impl CopyBackend {
133    /// Create a new `CopyBackend` rooted at `root`.
134    #[must_use]
135    pub const fn new(root: PathBuf) -> Self {
136        Self { root }
137    }
138
139    // -----------------------------------------------------------------------
140    // Helpers
141    // -----------------------------------------------------------------------
142
143    fn workspaces_dir(&self) -> PathBuf {
144        self.root.join("ws")
145    }
146
147    fn read_epoch_file(ws_path: &Path, name: &str) -> Result<EpochId, CopyBackendError> {
148        let epoch_file = ws_path.join(EPOCH_FILE);
149        if !epoch_file.exists() {
150            return Err(CopyBackendError::MissingEpochFile {
151                workspace: name.to_owned(),
152            });
153        }
154        let raw = std::fs::read_to_string(&epoch_file)?;
155        let oid_str = raw.trim();
156        EpochId::new(oid_str).map_err(|e| CopyBackendError::InvalidEpochFile {
157            workspace: name.to_owned(),
158            reason: e.to_string(),
159        })
160    }
161
162    fn write_epoch_file(ws_path: &Path, epoch: &EpochId) -> Result<(), CopyBackendError> {
163        let epoch_file = ws_path.join(EPOCH_FILE);
164        std::fs::write(&epoch_file, format!("{}\n", epoch.as_str()))?;
165        Ok(())
166    }
167
168    /// Extract the epoch's tree into `dest` using `git archive | tar -x`.
169    ///
170    /// This creates a full copy of all tracked files at the epoch commit.
171    fn extract_epoch(&self, epoch: &EpochId, dest: &Path) -> Result<(), CopyBackendError> {
172        std::fs::create_dir_all(dest)?;
173
174        // Run `git archive <oid> | tar -x -C <dest>`
175        let mut archive = Command::new("git")
176            .args(["archive", epoch.as_str()])
177            .current_dir(&self.root)
178            .stdout(Stdio::piped())
179            .stderr(Stdio::piped())
180            .spawn()
181            .map_err(CopyBackendError::Io)?;
182
183        let archive_stdout = archive.stdout.take().expect("piped stdout");
184
185        let tar_status = Command::new("tar")
186            .args(["-x", "-C"])
187            .arg(dest)
188            .stdin(archive_stdout)
189            .stdout(Stdio::null())
190            .stderr(Stdio::piped())
191            .status()
192            .map_err(CopyBackendError::Io)?;
193
194        let archive_output = archive.wait_with_output().map_err(CopyBackendError::Io)?;
195
196        if !archive_output.status.success() {
197            let stderr = String::from_utf8_lossy(&archive_output.stderr)
198                .trim()
199                .to_owned();
200            return Err(CopyBackendError::Command {
201                command: format!("git archive {}", epoch.as_str()),
202                stderr,
203                exit_code: archive_output.status.code(),
204            });
205        }
206
207        if !tar_status.success() {
208            return Err(CopyBackendError::Command {
209                command: format!("tar -x -C {}", dest.display()),
210                stderr: String::new(),
211                exit_code: tar_status.code(),
212            });
213        }
214
215        Ok(())
216    }
217}
218
219// ---------------------------------------------------------------------------
220// WorkspaceBackend impl
221// ---------------------------------------------------------------------------
222
223impl WorkspaceBackend for CopyBackend {
224    type Error = CopyBackendError;
225
226    fn create(&self, name: &WorkspaceId, epoch: &EpochId) -> Result<WorkspaceInfo, Self::Error> {
227        let ws_path = self.workspace_path(name);
228
229        // Idempotency: workspace with correct epoch already exists.
230        if ws_path.exists() {
231            if let Ok(existing_epoch) = Self::read_epoch_file(&ws_path, name.as_str())
232                && existing_epoch == *epoch
233            {
234                return Ok(WorkspaceInfo {
235                    id: name.clone(),
236                    path: ws_path,
237                    epoch: epoch.clone(),
238                    state: WorkspaceState::Active,
239                    mode: WorkspaceMode::default(),
240                commits_ahead: 0,
241                });
242            }
243            // Partial or mismatched workspace — remove and recreate.
244            std::fs::remove_dir_all(&ws_path)?;
245        }
246
247        std::fs::create_dir_all(self.workspaces_dir())?;
248
249        // Extract the epoch tree into the workspace directory.
250        self.extract_epoch(epoch, &ws_path)?;
251
252        // Write the epoch marker.
253        Self::write_epoch_file(&ws_path, epoch)?;
254
255        Ok(WorkspaceInfo {
256            id: name.clone(),
257            path: ws_path,
258            epoch: epoch.clone(),
259            state: WorkspaceState::Active,
260            mode: WorkspaceMode::default(),
261        commits_ahead: 0,
262        })
263    }
264
265    fn destroy(&self, name: &WorkspaceId) -> Result<(), Self::Error> {
266        let ws_path = self.workspace_path(name);
267        if !ws_path.exists() {
268            return Ok(()); // idempotent
269        }
270        std::fs::remove_dir_all(&ws_path)?;
271        Ok(())
272    }
273
274    fn list(&self) -> Result<Vec<WorkspaceInfo>, Self::Error> {
275        let ws_dir = self.workspaces_dir();
276        if !ws_dir.exists() {
277            return Ok(vec![]);
278        }
279
280        let mut workspaces = Vec::new();
281        for entry in std::fs::read_dir(&ws_dir)? {
282            let entry = entry?;
283            let path = entry.path();
284            if !path.is_dir() {
285                continue;
286            }
287            let name_str = entry.file_name().to_string_lossy().into_owned();
288            let Ok(ws_id) = WorkspaceId::new(&name_str) else {
289                continue;
290            };
291            let Ok(epoch) = Self::read_epoch_file(&path, &name_str) else {
292                continue; // skip corrupted entries
293            };
294            workspaces.push(WorkspaceInfo {
295                id: ws_id,
296                path,
297                epoch,
298                state: WorkspaceState::Active,
299                mode: WorkspaceMode::default(),
300            commits_ahead: 0,
301            });
302        }
303        Ok(workspaces)
304    }
305
306    fn status(&self, name: &WorkspaceId) -> Result<WorkspaceStatus, Self::Error> {
307        let ws_path = self.workspace_path(name);
308        if !ws_path.exists() {
309            return Err(CopyBackendError::NotFound {
310                name: name.as_str().to_owned(),
311            });
312        }
313
314        let base_epoch = Self::read_epoch_file(&ws_path, name.as_str())?;
315
316        // Use `git diff --name-only` to find modified/added/deleted files.
317        let output = Command::new("git")
318            .args([
319                "diff",
320                "--name-only",
321                base_epoch.as_str(),
322                "--",
323                ws_path.to_str().unwrap_or(""),
324            ])
325            .current_dir(&self.root)
326            .output()
327            .map_err(CopyBackendError::Io)?;
328
329        let dirty_files: Vec<PathBuf> = if output.status.success() {
330            String::from_utf8_lossy(&output.stdout)
331                .lines()
332                .filter(|l| !l.is_empty())
333                .map(PathBuf::from)
334                .collect()
335        } else {
336            vec![]
337        };
338
339        // Determine staleness: check if current epoch ref is ahead of base epoch.
340        let is_stale = self.check_stale(&base_epoch);
341
342        Ok(WorkspaceStatus::new(base_epoch, dirty_files, is_stale))
343    }
344
345    fn snapshot(&self, name: &WorkspaceId) -> Result<SnapshotResult, Self::Error> {
346        let ws_path = self.workspace_path(name);
347        if !ws_path.exists() {
348            return Err(CopyBackendError::NotFound {
349                name: name.as_str().to_owned(),
350            });
351        }
352
353        let base_epoch = Self::read_epoch_file(&ws_path, name.as_str())?;
354
355        // Walk the workspace and compare against the base epoch tree.
356        let tracked = self.tracked_files_at_epoch(&base_epoch);
357        let workspace_files = Self::walk_workspace(&ws_path);
358
359        let is_excluded = |name: &str| name == EPOCH_FILE;
360
361        let mut added = Vec::new();
362        let mut modified = Vec::new();
363        let mut deleted = Vec::new();
364
365        // Check for modified and deleted tracked files.
366        for rel_path in &tracked {
367            let rel = std::path::Path::new(rel_path);
368            let name_str = rel_path.as_str();
369            if is_excluded(name_str) {
370                continue;
371            }
372            let abs = ws_path.join(rel);
373            if !abs.exists() {
374                deleted.push(rel.to_path_buf());
375            } else if self.file_differs_from_epoch(rel, &base_epoch) {
376                modified.push(rel.to_path_buf());
377            }
378        }
379
380        // Check for untracked (added) files.
381        let tracked_set: HashSet<&str> = tracked.iter().map(std::string::String::as_str).collect();
382        for ws_rel in &workspace_files {
383            let ws_rel_str = ws_rel.to_string_lossy();
384            if is_excluded(ws_rel_str.as_ref()) || tracked_set.contains(ws_rel_str.as_ref()) {
385                continue;
386            }
387            added.push(ws_rel.clone());
388        }
389
390        Ok(SnapshotResult::new(added, modified, deleted))
391    }
392
393    fn workspace_path(&self, name: &WorkspaceId) -> PathBuf {
394        self.workspaces_dir().join(name.as_str())
395    }
396
397    fn exists(&self, name: &WorkspaceId) -> bool {
398        self.workspace_path(name).is_dir()
399    }
400}
401
402impl CopyBackend {
403    /// Check if the workspace's base epoch is behind the current epoch ref.
404    fn check_stale(&self, base_epoch: &EpochId) -> bool {
405        let output = Command::new("git")
406            .args(["rev-parse", "refs/manifold/epoch/current"])
407            .current_dir(&self.root)
408            .stdout(Stdio::piped())
409            .stderr(Stdio::null())
410            .output();
411
412        if let Ok(out) = output
413            && out.status.success()
414        {
415            let current = String::from_utf8_lossy(&out.stdout).trim().to_owned();
416            return current != base_epoch.as_str();
417        }
418        false
419    }
420
421    /// List all files tracked at the given epoch via `git ls-tree`.
422    fn tracked_files_at_epoch(&self, epoch: &EpochId) -> Vec<String> {
423        let output = Command::new("git")
424            .args(["ls-tree", "-r", "--name-only", epoch.as_str()])
425            .current_dir(&self.root)
426            .output();
427
428        match output {
429            Ok(out) if out.status.success() => String::from_utf8_lossy(&out.stdout)
430                .lines()
431                .filter(|l| !l.is_empty())
432                .map(str::to_owned)
433                .collect(),
434            _ => vec![],
435        }
436    }
437
438    /// Walk the workspace directory, returning paths relative to `ws_path`.
439    fn walk_workspace(ws_path: &Path) -> Vec<PathBuf> {
440        let mut files = Vec::new();
441        Self::walk_dir(ws_path, ws_path, &mut files);
442        files
443    }
444
445    fn walk_dir(base: &Path, current: &Path, files: &mut Vec<PathBuf>) {
446        let Ok(entries) = std::fs::read_dir(current) else {
447            return;
448        };
449        for entry in entries.flatten() {
450            let path = entry.path();
451            if let Ok(meta) = entry.metadata() {
452                if meta.is_dir() {
453                    Self::walk_dir(base, &path, files);
454                } else if let Ok(rel) = path.strip_prefix(base) {
455                    files.push(rel.to_path_buf());
456                }
457            }
458        }
459    }
460
461    /// Check if a tracked file in the workspace differs from the epoch version.
462    fn file_differs_from_epoch(&self, rel: &Path, epoch: &EpochId) -> bool {
463        let blob_path = format!("{}:{}", epoch.as_str(), rel.display());
464        let output = Command::new("git")
465            .args(["cat-file", "blob", &blob_path])
466            .current_dir(&self.root)
467            .output();
468
469        let Ok(out) = output else { return false };
470        if !out.status.success() {
471            return false;
472        }
473        out.stdout != std::fs::read(rel).unwrap_or_default()
474    }
475}
476
477// ---------------------------------------------------------------------------
478// Tests
479// ---------------------------------------------------------------------------
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484
485    #[test]
486    fn copy_backend_error_display() {
487        let err = CopyBackendError::NotFound {
488            name: "alice".to_owned(),
489        };
490        assert_eq!(format!("{err}"), "workspace 'alice' not found");
491
492        let err = CopyBackendError::MissingEpochFile {
493            workspace: "bob".to_owned(),
494        };
495        assert!(format!("{err}").contains("bob"));
496        assert!(format!("{err}").contains(EPOCH_FILE));
497
498        let err = CopyBackendError::Io(std::io::Error::other("disk full"));
499        assert!(format!("{err}").contains("disk full"));
500    }
501
502    #[test]
503    fn copy_backend_new() {
504        let backend = CopyBackend::new(PathBuf::from("/tmp/repo"));
505        assert_eq!(backend.root, PathBuf::from("/tmp/repo"));
506        assert_eq!(
507            backend.workspaces_dir(),
508            PathBuf::from("/tmp/repo").join("ws")
509        );
510    }
511}