Skip to main content

maw/merge/
collect.rs

1//! Collect step of the N-way merge pipeline.
2//!
3//! For each source workspace, calls `backend.snapshot()` to enumerate changed
4//! files, then reads their content. Produces one [`PatchSet`] per workspace.
5//!
6//! # Invariants
7//!
8//! - **Determinism**: `PatchSet` changes are sorted by path on construction.
9//! - **Completeness**: Every workspace in `workspace_ids` produces a `PatchSet`,
10//!   including empty workspaces. The caller decides how to handle empties.
11//! - **Isolation**: Each workspace is snapshotted independently. A failure in
12//!   one workspace returns `Err` immediately (fail-fast).
13//!
14//! # `FileId` and blob OID enrichment (Phase 3+)
15//!
16//! When `repo_root` is provided, `collect_snapshots` enriches each
17//! [`FileChange`] with:
18//!
19//! - `file_id`: looked up from `.manifold/fileids` for Modified/Deleted files
20//!   (files that existed in the epoch). Added files receive a fresh random
21//!   [`FileId`]. If the fileids file is absent, `FileIds` are omitted.
22//! - `blob`: the git blob OID for the new content, computed via
23//!   `git hash-object -w --stdin`. Enables O(1) hash-equality checks in the
24//!   resolve step.
25
26use std::fmt;
27use std::io::Write as IoWrite;
28use std::path::{Path, PathBuf};
29use std::process::{Command, Stdio};
30
31use crate::backend::WorkspaceBackend;
32use crate::model::file_id::FileIdMap;
33use crate::model::patch::FileId;
34use crate::model::types::{EpochId, GitOid, WorkspaceId};
35
36use super::types::{ChangeKind, FileChange, PatchSet};
37
38// ---------------------------------------------------------------------------
39// CollectError
40// ---------------------------------------------------------------------------
41
42/// Errors that can occur during the collect step.
43#[derive(Debug)]
44#[allow(clippy::enum_variant_names)]
45pub enum CollectError {
46    /// A workspace snapshot operation failed.
47    SnapshotFailed {
48        /// The workspace that failed.
49        workspace_id: WorkspaceId,
50        /// Underlying error message.
51        reason: String,
52    },
53    /// Reading a changed file's content failed.
54    ReadFailed {
55        /// The workspace where the file lives.
56        workspace_id: WorkspaceId,
57        /// The file that could not be read (relative path).
58        path: PathBuf,
59        /// Underlying I/O error message.
60        reason: String,
61    },
62    /// Querying the workspace's base epoch failed.
63    EpochFailed {
64        /// The workspace that failed.
65        workspace_id: WorkspaceId,
66        /// Underlying error message.
67        reason: String,
68    },
69}
70
71impl fmt::Display for CollectError {
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        match self {
74            Self::SnapshotFailed {
75                workspace_id,
76                reason,
77            } => {
78                write!(
79                    f,
80                    "snapshot failed for workspace '{workspace_id}': {reason}"
81                )
82            }
83            Self::ReadFailed {
84                workspace_id,
85                path,
86                reason,
87            } => {
88                write!(
89                    f,
90                    "failed to read '{}' in workspace '{}': {}",
91                    path.display(),
92                    workspace_id,
93                    reason
94                )
95            }
96            Self::EpochFailed {
97                workspace_id,
98                reason,
99            } => {
100                write!(
101                    f,
102                    "epoch query failed for workspace '{workspace_id}': {reason}"
103                )
104            }
105        }
106    }
107}
108
109impl std::error::Error for CollectError {}
110
111// ---------------------------------------------------------------------------
112// collect_snapshots
113// ---------------------------------------------------------------------------
114
115/// Collect changed-file snapshots from a set of workspaces.
116///
117/// For each workspace in `workspace_ids`:
118/// 1. Calls `backend.snapshot()` to enumerate added, modified, and deleted paths.
119/// 2. Calls `backend.status()` to determine the workspace's base epoch.
120/// 3. Reads file content for added/modified files from the workspace directory.
121/// 4. Enriches each [`FileChange`] with a git blob OID (via `git hash-object`)
122///    and a stable [`FileId`] (from `.manifold/fileids` or freshly generated).
123///
124/// Returns one `PatchSet` per workspace in the same order as `workspace_ids`.
125/// Empty workspaces (no changes) produce an empty `PatchSet` — they are **not**
126/// filtered out, so the caller receives a complete picture.
127///
128/// # Arguments
129///
130/// * `repo_root` — Path to the git repository root, used to:
131///   - Write blobs via `git hash-object -w --stdin`.
132///   - Load the epoch `FileId` map from `<repo_root>/.manifold/fileids`.
133///
134/// # Errors
135///
136/// Returns [`CollectError`] on the first workspace that fails. Failures include:
137/// - Workspace not found (e.g., destroyed between listing and collect)
138/// - I/O errors reading file content
139/// - Backend errors querying status
140pub fn collect_snapshots<B: WorkspaceBackend>(
141    repo_root: &Path,
142    backend: &B,
143    workspace_ids: &[WorkspaceId],
144) -> Result<Vec<PatchSet>, CollectError> {
145    // Load the epoch FileId map once; shared across all workspaces.
146    // If the file doesn't exist yet (new repo), use an empty map.
147    let fileids_path = repo_root.join(".manifold").join("fileids");
148    let file_id_map = FileIdMap::load(&fileids_path).unwrap_or_default();
149
150    let mut patch_sets = Vec::with_capacity(workspace_ids.len());
151    for ws_id in workspace_ids {
152        let patch_set = collect_one(repo_root, &file_id_map, backend, ws_id)?;
153        patch_sets.push(patch_set);
154    }
155
156    Ok(patch_sets)
157}
158
159// ---------------------------------------------------------------------------
160// Internal helpers
161// ---------------------------------------------------------------------------
162
163/// Collect a single workspace's changes into a `PatchSet`.
164///
165/// Enriches each [`FileChange`] with:
166/// - `file_id`: from `file_id_map` (Modified/Deleted) or freshly generated (Added).
167/// - `blob`: computed via `git hash-object -w --stdin` for Added/Modified content.
168fn collect_one<B: WorkspaceBackend>(
169    repo_root: &Path,
170    file_id_map: &FileIdMap,
171    backend: &B,
172    ws_id: &WorkspaceId,
173) -> Result<PatchSet, CollectError> {
174    // Step 1: Enumerate changed paths.
175    let snapshot = backend
176        .snapshot(ws_id)
177        .map_err(|e| CollectError::SnapshotFailed {
178            workspace_id: ws_id.clone(),
179            reason: e.to_string(),
180        })?;
181
182    // Step 2: Determine the workspace's base epoch.
183    let status = backend
184        .status(ws_id)
185        .map_err(|e| CollectError::EpochFailed {
186            workspace_id: ws_id.clone(),
187            reason: e.to_string(),
188        })?;
189    let epoch = status.base_epoch;
190
191    // Step 3: Short-circuit for empty workspaces.
192    if snapshot.is_empty() {
193        return Ok(PatchSet::new(ws_id.clone(), epoch, vec![]));
194    }
195
196    // Step 4: Build FileChanges, reading content for non-deletions.
197    let ws_path = backend.workspace_path(ws_id);
198    let capacity = snapshot.change_count();
199    let mut changes = Vec::with_capacity(capacity);
200
201    // Added files: read content, generate fresh FileId, compute blob OID.
202    for path in &snapshot.added {
203        let content = read_workspace_file(&ws_path, path, ws_id)?;
204        let blob = git_hash_object(repo_root, &content);
205        // Assign a fresh FileId for new files. The FileIdMap for the epoch
206        // won't have an entry yet; the FileId is minted here and would be
207        // persisted by the workspace's oplog in a full implementation.
208        let file_id = Some(FileId::random());
209        changes.push(FileChange::with_identity(
210            path.clone(),
211            ChangeKind::Added,
212            Some(content),
213            file_id,
214            blob,
215        ));
216    }
217
218    // Modified files: read current content, look up existing FileId, compute blob OID.
219    for path in &snapshot.modified {
220        let content = read_workspace_file(&ws_path, path, ws_id)?;
221        let blob = git_hash_object(repo_root, &content);
222        // Modified files existed in the epoch, so their FileId is in the map.
223        let file_id = file_id_map.id_for_path(path);
224        changes.push(FileChange::with_identity(
225            path.clone(),
226            ChangeKind::Modified,
227            Some(content),
228            file_id,
229            blob,
230        ));
231    }
232
233    // Deleted files: no content; look up FileId from epoch map.
234    //
235    // Phantom-deletion filter: the snapshot diffs the working tree against the
236    // *current* epoch, but this workspace may be based on an older epoch.
237    // Files added by other workspaces (and already merged into the current
238    // epoch) show up as "Deleted" here because the worker never had them.
239    // These are phantom deletions — skip them so the merge engine doesn't
240    // remove files the worker never touched.
241    //
242    // IMPORTANT: We must check against the workspace's *creation* epoch, not
243    // its current HEAD. Agents may commit changes inside a workspace (e.g.
244    // `git rm foo && git commit`), advancing HEAD beyond the creation epoch.
245    // If a committed deletion moves HEAD to a commit where the file is absent,
246    // checking HEAD would incorrectly classify the deletion as phantom.
247    //
248    // `workspace_creation_epoch()` finds the original epoch by computing the
249    // merge-base of the workspace HEAD with the current epoch ref. This is
250    // the commit the workspace was initially created from.
251    let creation_epoch = workspace_creation_epoch(repo_root, &ws_path, &epoch);
252    for path in &snapshot.deleted {
253        if !path_exists_at_commit(repo_root, &creation_epoch, path) {
254            // File doesn't exist at the workspace's creation epoch — it was
255            // added after this workspace was created. Not a real deletion.
256            continue;
257        }
258        let file_id = file_id_map.id_for_path(path);
259        changes.push(FileChange::with_identity(
260            path.clone(),
261            ChangeKind::Deleted,
262            None,
263            file_id,
264            None, // no blob for deletions
265        ));
266    }
267
268    Ok(PatchSet::new(ws_id.clone(), epoch, changes))
269}
270
271/// Determine the epoch a workspace was originally created from.
272///
273/// Agents may commit changes inside a workspace, advancing HEAD beyond the
274/// epoch the workspace was created at. The `status().base_epoch` returns HEAD,
275/// which is wrong for the phantom-deletion filter when the agent has committed
276/// deletions.
277///
278/// This function computes `git merge-base HEAD <epoch>` inside the workspace
279/// to find the fork point — the original creation epoch. Falls back to `epoch`
280/// (the workspace HEAD from status) when:
281/// - The merge-base command fails (e.g. non-git backend)
282/// - HEAD equals the epoch (no agent commits)
283fn workspace_creation_epoch(repo_root: &Path, ws_path: &Path, ws_head: &EpochId) -> EpochId {
284    // Read the current epoch ref from the repo root.
285    let epoch_ref_output = Command::new("git")
286        .args(["rev-parse", "refs/manifold/epoch/current"])
287        .current_dir(repo_root)
288        .output();
289
290    let current_epoch_oid = match epoch_ref_output {
291        Ok(out) if out.status.success() => {
292            String::from_utf8_lossy(&out.stdout).trim().to_owned()
293        }
294        _ => return ws_head.clone(), // No epoch ref → fall back to ws HEAD
295    };
296
297    // If HEAD already equals the current epoch, no agent commits were made.
298    if ws_head.as_str() == current_epoch_oid {
299        return ws_head.clone();
300    }
301
302    // Compute merge-base(HEAD, current_epoch) inside the workspace.
303    let mb_output = Command::new("git")
304        .args(["merge-base", "HEAD", &current_epoch_oid])
305        .current_dir(ws_path)
306        .output();
307
308    match mb_output {
309        Ok(out) if out.status.success() => {
310            let oid_str = String::from_utf8_lossy(&out.stdout).trim().to_owned();
311            EpochId::new(&oid_str).unwrap_or_else(|_| ws_head.clone())
312        }
313        _ => ws_head.clone(), // merge-base failed → fall back to ws HEAD
314    }
315}
316
317/// Check whether a file path exists in a given git commit's tree.
318///
319/// Uses `git cat-file -e <commit>:<path>` which exits 0 if the object exists
320/// and non-zero otherwise. This is fast (no data transfer) and works for any
321/// tree-ish.
322fn path_exists_at_commit(repo_root: &Path, commit: &EpochId, path: &Path) -> bool {
323    let rev = format!("{}:{}", commit.as_str(), path.display());
324    Command::new("git")
325        .args(["cat-file", "-e", &rev])
326        .current_dir(repo_root)
327        .stdout(Stdio::null())
328        .stderr(Stdio::null())
329        .status()
330        .is_ok_and(|s| s.success())
331}
332
333/// Read the current content of a file from a workspace's working tree.
334fn read_workspace_file(
335    ws_path: &Path,
336    rel_path: &Path,
337    ws_id: &WorkspaceId,
338) -> Result<Vec<u8>, CollectError> {
339    let full_path = ws_path.join(rel_path);
340    std::fs::read(&full_path).map_err(|e| CollectError::ReadFailed {
341        workspace_id: ws_id.clone(),
342        path: rel_path.to_path_buf(),
343        reason: e.to_string(),
344    })
345}
346
347/// Write `content` to the git object store and return its blob OID.
348///
349/// Runs `git hash-object -w --stdin` in `repo_root`. Returns `None` on any
350/// failure (git unavailable, I/O error, invalid OID output) — callers treat
351/// a missing blob OID as a degraded-mode fallback, not a hard error.
352fn git_hash_object(repo_root: &Path, content: &[u8]) -> Option<GitOid> {
353    let mut child = Command::new("git")
354        .args(["hash-object", "-w", "--stdin"])
355        .current_dir(repo_root)
356        .stdin(Stdio::piped())
357        .stdout(Stdio::piped())
358        .stderr(Stdio::null())
359        .spawn()
360        .ok()?;
361
362    // Write content to stdin; ignore broken-pipe errors.
363    if let Some(stdin) = child.stdin.take() {
364        let mut stdin = stdin;
365        let _ = stdin.write_all(content);
366    }
367
368    let output = child.wait_with_output().ok()?;
369    if !output.status.success() {
370        return None;
371    }
372
373    let hex = String::from_utf8(output.stdout).ok()?;
374    GitOid::new(hex.trim()).ok()
375}
376
377// ---------------------------------------------------------------------------
378// Tests
379// ---------------------------------------------------------------------------
380
381#[cfg(test)]
382#[allow(clippy::all, clippy::pedantic, clippy::nursery)]
383mod tests {
384    use super::*;
385    use crate::backend::WorkspaceBackend;
386    use crate::backend::git::GitWorktreeBackend;
387    use crate::model::types::{EpochId, WorkspaceId};
388    use std::fs;
389    use std::process::Command;
390    use tempfile::TempDir;
391
392    // -----------------------------------------------------------------------
393    // Test helpers
394    // -----------------------------------------------------------------------
395
396    /// Set up a fresh git repo with one initial commit.
397    ///
398    /// Returns `(TempDir, EpochId)` where `EpochId` is the initial commit OID.
399    /// The `TempDir` must outlive the `GitWorktreeBackend` that uses it.
400    fn setup_git_repo() -> (TempDir, EpochId) {
401        let temp_dir = TempDir::new().unwrap();
402        let root = temp_dir.path();
403
404        Command::new("git")
405            .args(["init"])
406            .current_dir(root)
407            .output()
408            .unwrap();
409
410        for (key, val) in [
411            ("user.name", "Test User"),
412            ("user.email", "test@example.com"),
413            ("commit.gpgsign", "false"),
414        ] {
415            Command::new("git")
416                .args(["config", key, val])
417                .current_dir(root)
418                .output()
419                .unwrap();
420        }
421
422        // Write an initial file so the repo has at least one tracked file.
423        fs::write(root.join("README.md"), "# Test Repo").unwrap();
424        Command::new("git")
425            .args(["add", "README.md"])
426            .current_dir(root)
427            .output()
428            .unwrap();
429        Command::new("git")
430            .args(["commit", "-m", "Initial commit"])
431            .current_dir(root)
432            .output()
433            .unwrap();
434
435        let oid_str = git_head_oid(root);
436        let epoch = EpochId::new(&oid_str).unwrap();
437        (temp_dir, epoch)
438    }
439
440    /// Return the current HEAD OID of a git repo.
441    fn git_head_oid(root: &std::path::Path) -> String {
442        let out = Command::new("git")
443            .args(["rev-parse", "HEAD"])
444            .current_dir(root)
445            .output()
446            .unwrap();
447        String::from_utf8(out.stdout).unwrap().trim().to_owned()
448    }
449
450    // -----------------------------------------------------------------------
451    // Error display
452    // -----------------------------------------------------------------------
453
454    #[test]
455    fn collect_error_display_snapshot_failed() {
456        let ws_id = WorkspaceId::new("alpha").unwrap();
457        let err = CollectError::SnapshotFailed {
458            workspace_id: ws_id,
459            reason: "disk full".to_owned(),
460        };
461        let msg = format!("{err}");
462        assert!(msg.contains("alpha"), "missing workspace name: {msg}");
463        assert!(msg.contains("disk full"), "missing reason: {msg}");
464    }
465
466    #[test]
467    fn collect_error_display_read_failed() {
468        let ws_id = WorkspaceId::new("beta").unwrap();
469        let err = CollectError::ReadFailed {
470            workspace_id: ws_id,
471            path: PathBuf::from("src/lib.rs"),
472            reason: "permission denied".to_owned(),
473        };
474        let msg = format!("{err}");
475        assert!(msg.contains("beta"), "missing workspace name: {msg}");
476        assert!(msg.contains("src/lib.rs"), "missing path: {msg}");
477        assert!(msg.contains("permission denied"), "missing reason: {msg}");
478    }
479
480    #[test]
481    fn collect_error_display_epoch_failed() {
482        let ws_id = WorkspaceId::new("gamma").unwrap();
483        let err = CollectError::EpochFailed {
484            workspace_id: ws_id,
485            reason: "not a git repo".to_owned(),
486        };
487        let msg = format!("{err}");
488        assert!(msg.contains("gamma"), "missing workspace name: {msg}");
489        assert!(msg.contains("not a git repo"), "missing reason: {msg}");
490    }
491
492    // -----------------------------------------------------------------------
493    // Empty workspaces
494    // -----------------------------------------------------------------------
495
496    #[test]
497    fn collect_empty_workspace_produces_empty_patch_set() {
498        let (temp_dir, epoch) = setup_git_repo();
499        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
500        let ws_id = WorkspaceId::new("empty-ws").unwrap();
501        backend.create(&ws_id, &epoch).unwrap();
502
503        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id.clone()]).unwrap();
504
505        assert_eq!(results.len(), 1, "should have one PatchSet");
506        let ps = &results[0];
507        assert_eq!(ps.workspace_id, ws_id);
508        assert!(ps.is_empty(), "no changes expected: {:?}", ps.changes);
509        assert_eq!(ps.epoch, epoch);
510    }
511
512    // -----------------------------------------------------------------------
513    // Added files
514    // -----------------------------------------------------------------------
515
516    #[test]
517    fn collect_added_file() {
518        let (temp_dir, epoch) = setup_git_repo();
519        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
520        let ws_id = WorkspaceId::new("add-ws").unwrap();
521        let info = backend.create(&ws_id, &epoch).unwrap();
522
523        fs::write(info.path.join("new.rs"), "fn main() {}").unwrap();
524
525        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap();
526        let ps = &results[0];
527
528        assert_eq!(ps.change_count(), 1);
529        let change = &ps.changes[0];
530        assert_eq!(change.path, PathBuf::from("new.rs"));
531        assert!(matches!(change.kind, ChangeKind::Added));
532        assert_eq!(change.content.as_deref(), Some(b"fn main() {}".as_ref()));
533    }
534
535    // -----------------------------------------------------------------------
536    // Modified files
537    // -----------------------------------------------------------------------
538
539    #[test]
540    fn collect_modified_file() {
541        let (temp_dir, epoch) = setup_git_repo();
542        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
543        let ws_id = WorkspaceId::new("mod-ws").unwrap();
544        let info = backend.create(&ws_id, &epoch).unwrap();
545
546        fs::write(info.path.join("README.md"), "# Modified").unwrap();
547
548        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap();
549        let ps = &results[0];
550
551        assert_eq!(ps.change_count(), 1);
552        let change = &ps.changes[0];
553        assert_eq!(change.path, PathBuf::from("README.md"));
554        assert!(matches!(change.kind, ChangeKind::Modified));
555        assert_eq!(change.content.as_deref(), Some(b"# Modified".as_ref()));
556    }
557
558    // -----------------------------------------------------------------------
559    // Deleted files
560    // -----------------------------------------------------------------------
561
562    #[test]
563    fn collect_deleted_file() {
564        let (temp_dir, epoch) = setup_git_repo();
565        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
566        let ws_id = WorkspaceId::new("del-ws").unwrap();
567        let info = backend.create(&ws_id, &epoch).unwrap();
568
569        fs::remove_file(info.path.join("README.md")).unwrap();
570
571        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap();
572        let ps = &results[0];
573
574        assert_eq!(ps.change_count(), 1);
575        let change = &ps.changes[0];
576        assert_eq!(change.path, PathBuf::from("README.md"));
577        assert!(matches!(change.kind, ChangeKind::Deleted));
578        assert!(change.content.is_none(), "deletions have no content");
579    }
580
581    /// Committed deletion: agent does `git rm` + `git commit` inside the workspace.
582    ///
583    /// This is a regression test for bn-129d: the merge engine silently dropped
584    /// file deletions that were committed (not just staged or working-tree
585    /// changes). The phantom-deletion filter was using HEAD (which had the
586    /// deletion committed) instead of the creation epoch (where the file still
587    /// existed), causing it to incorrectly classify real deletions as phantom.
588    #[test]
589    fn collect_committed_deletion() {
590        let (temp_dir, epoch) = setup_git_repo();
591        let root = temp_dir.path();
592        let backend = GitWorktreeBackend::new(root.to_path_buf());
593
594        // Set refs/manifold/epoch/current so snapshot() diffs against the epoch.
595        Command::new("git")
596            .args(["update-ref", "refs/manifold/epoch/current", epoch.as_str()])
597            .current_dir(root)
598            .output()
599            .unwrap();
600
601        let ws_id = WorkspaceId::new("committed-del").unwrap();
602        let info = backend.create(&ws_id, &epoch).unwrap();
603
604        // Agent commits a deletion inside the workspace (git rm + git commit).
605        Command::new("git")
606            .args(["rm", "README.md"])
607            .current_dir(&info.path)
608            .output()
609            .unwrap();
610        Command::new("git")
611            .args(["commit", "-m", "delete README.md"])
612            .current_dir(&info.path)
613            .output()
614            .unwrap();
615
616        // Verify HEAD has advanced beyond the epoch.
617        let ws_head = git_head_oid(&info.path);
618        assert_ne!(
619            ws_head,
620            epoch.as_str(),
621            "workspace HEAD should have advanced after commit"
622        );
623
624        let results = collect_snapshots(root, &backend, &[ws_id]).unwrap();
625        let ps = &results[0];
626
627        assert_eq!(
628            ps.change_count(),
629            1,
630            "committed deletion should be captured, not silently dropped: {:?}",
631            ps.changes
632        );
633        let change = &ps.changes[0];
634        assert_eq!(change.path, PathBuf::from("README.md"));
635        assert!(
636            matches!(change.kind, ChangeKind::Deleted),
637            "change should be Deleted, got {:?}",
638            change.kind
639        );
640        assert!(change.content.is_none(), "deletions have no content");
641    }
642
643    /// Deletion-only workspace: `PatchSet` reports all deletions, none are filtered.
644    #[test]
645    fn collect_deletion_only_workspace() {
646        let (temp_dir, _epoch) = setup_git_repo();
647        let root = temp_dir.path();
648        let backend = GitWorktreeBackend::new(root.to_path_buf());
649
650        // Add a second tracked file so we can delete both later.
651        fs::write(root.join("lib.rs"), "pub fn lib() {}").unwrap();
652        Command::new("git")
653            .args(["add", "lib.rs"])
654            .current_dir(root)
655            .output()
656            .unwrap();
657        Command::new("git")
658            .args(["commit", "-m", "Add lib.rs"])
659            .current_dir(root)
660            .output()
661            .unwrap();
662        let epoch2 = EpochId::new(&git_head_oid(root)).unwrap();
663
664        let ws_id = WorkspaceId::new("del-only").unwrap();
665        let info = backend.create(&ws_id, &epoch2).unwrap();
666
667        // Delete both tracked files.
668        fs::remove_file(info.path.join("README.md")).unwrap();
669        fs::remove_file(info.path.join("lib.rs")).unwrap();
670
671        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap();
672        let ps = &results[0];
673
674        assert!(
675            ps.is_deletion_only(),
676            "expected deletion-only: {:?}",
677            ps.changes
678        );
679        assert_eq!(ps.deleted_count(), 2);
680        assert_eq!(ps.added_count(), 0);
681        assert_eq!(ps.modified_count(), 0);
682        for change in &ps.changes {
683            assert!(change.content.is_none(), "deletions should have no content");
684        }
685    }
686
687    // -----------------------------------------------------------------------
688    // Multiple workspaces with various change patterns
689    // -----------------------------------------------------------------------
690
691    /// Collect from 3 workspaces with disjoint, mixed, and empty changes.
692    #[test]
693    fn collect_three_workspaces_various_patterns() {
694        let (temp_dir, epoch) = setup_git_repo();
695        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
696
697        // Workspace A: adds a new file.
698        let ws_a = WorkspaceId::new("ws-a").unwrap();
699        let info_a = backend.create(&ws_a, &epoch).unwrap();
700        fs::write(info_a.path.join("feature_a.rs"), "pub fn a() {}").unwrap();
701
702        // Workspace B: modifies README and adds a file.
703        let ws_b = WorkspaceId::new("ws-b").unwrap();
704        let info_b = backend.create(&ws_b, &epoch).unwrap();
705        fs::write(info_b.path.join("README.md"), "# Updated by B").unwrap();
706        fs::write(info_b.path.join("feature_b.rs"), "pub fn b() {}").unwrap();
707
708        // Workspace C: no changes.
709        let ws_c = WorkspaceId::new("ws-c").unwrap();
710        backend.create(&ws_c, &epoch).unwrap();
711
712        let ids = vec![ws_a.clone(), ws_b.clone(), ws_c.clone()];
713        let results = collect_snapshots(temp_dir.path(), &backend, &ids).unwrap();
714
715        assert_eq!(results.len(), 3, "should have one PatchSet per workspace");
716
717        let ps_a = &results[0];
718        let ps_b = &results[1];
719        let ps_c = &results[2];
720
721        // Workspace A: 1 added
722        assert_eq!(ps_a.workspace_id, ws_a);
723        assert_eq!(ps_a.change_count(), 1);
724        assert!(matches!(ps_a.changes[0].kind, ChangeKind::Added));
725        assert_eq!(ps_a.changes[0].path, PathBuf::from("feature_a.rs"));
726
727        // Workspace B: 1 modified + 1 added = 2 total, sorted by path
728        assert_eq!(ps_b.workspace_id, ws_b);
729        assert_eq!(ps_b.change_count(), 2);
730        // Sorted: README.md < feature_b.rs
731        assert_eq!(ps_b.changes[0].path, PathBuf::from("README.md"));
732        assert!(matches!(ps_b.changes[0].kind, ChangeKind::Modified));
733        assert_eq!(ps_b.changes[1].path, PathBuf::from("feature_b.rs"));
734        assert!(matches!(ps_b.changes[1].kind, ChangeKind::Added));
735
736        // Workspace C: empty
737        assert_eq!(ps_c.workspace_id, ws_c);
738        assert!(ps_c.is_empty());
739    }
740
741    // -----------------------------------------------------------------------
742    // Ordering: PatchSets match workspace_ids order
743    // -----------------------------------------------------------------------
744
745    #[test]
746    fn collect_preserves_workspace_order() {
747        let (temp_dir, epoch) = setup_git_repo();
748        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
749
750        let names = ["zulu", "alpha", "mike"];
751        let ids: Vec<WorkspaceId> = names.iter().map(|n| WorkspaceId::new(n).unwrap()).collect();
752
753        for ws_id in &ids {
754            backend.create(ws_id, &epoch).unwrap();
755        }
756
757        let results = collect_snapshots(temp_dir.path(), &backend, &ids).unwrap();
758
759        assert_eq!(results.len(), 3);
760        for (i, ws_id) in ids.iter().enumerate() {
761            assert_eq!(
762                &results[i].workspace_id, ws_id,
763                "PatchSet[{i}] should match input order"
764            );
765        }
766    }
767
768    // -----------------------------------------------------------------------
769    // Content correctness
770    // -----------------------------------------------------------------------
771
772    #[test]
773    fn collect_content_matches_file() {
774        let (temp_dir, epoch) = setup_git_repo();
775        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
776        let ws_id = WorkspaceId::new("content-ws").unwrap();
777        let info = backend.create(&ws_id, &epoch).unwrap();
778
779        let expected = b"hello world\n";
780        fs::write(info.path.join("hello.txt"), expected).unwrap();
781
782        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap();
783        let change = &results[0].changes[0];
784
785        assert_eq!(
786            change.content.as_deref(),
787            Some(expected.as_ref()),
788            "content should match what was written"
789        );
790    }
791
792    // -----------------------------------------------------------------------
793    // Error: nonexistent workspace
794    // -----------------------------------------------------------------------
795
796    #[test]
797    fn collect_nonexistent_workspace_returns_error() {
798        let (temp_dir, _epoch) = setup_git_repo();
799        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
800        let ws_id = WorkspaceId::new("no-such").unwrap();
801
802        let err = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap_err();
803        match err {
804            CollectError::SnapshotFailed { workspace_id, .. } => {
805                assert_eq!(workspace_id.as_str(), "no-such");
806            }
807            other => panic!("expected SnapshotFailed, got {other}"),
808        }
809    }
810
811    // -----------------------------------------------------------------------
812    // Phase 3: FileId + blob OID enrichment
813    // -----------------------------------------------------------------------
814
815    /// Added files should receive a fresh (non-None) `FileId`.
816    #[test]
817    fn collect_added_file_has_file_id() {
818        let (temp_dir, epoch) = setup_git_repo();
819        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
820        let ws_id = WorkspaceId::new("fileid-add").unwrap();
821        let info = backend.create(&ws_id, &epoch).unwrap();
822
823        fs::write(info.path.join("brand_new.rs"), "pub fn new() {}").unwrap();
824
825        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap();
826        let change = &results[0].changes[0];
827
828        assert!(
829            change.file_id.is_some(),
830            "added file should receive a fresh FileId"
831        );
832        assert!(
833            matches!(change.kind, ChangeKind::Added),
834            "kind should be Added"
835        );
836    }
837
838    /// Added files should have a blob OID computed via git hash-object.
839    #[test]
840    fn collect_added_file_has_blob_oid() {
841        let (temp_dir, epoch) = setup_git_repo();
842        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
843        let ws_id = WorkspaceId::new("blob-add").unwrap();
844        let info = backend.create(&ws_id, &epoch).unwrap();
845
846        fs::write(info.path.join("blob_test.rs"), "pub fn blob() {}").unwrap();
847
848        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap();
849        let change = &results[0].changes[0];
850
851        assert!(
852            change.blob.is_some(),
853            "added file should have a blob OID from git hash-object"
854        );
855    }
856
857    /// Modified files should have a blob OID that reflects the new content.
858    #[test]
859    fn collect_modified_file_has_blob_oid() {
860        let (temp_dir, epoch) = setup_git_repo();
861        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
862        let ws_id = WorkspaceId::new("blob-mod").unwrap();
863        let info = backend.create(&ws_id, &epoch).unwrap();
864
865        fs::write(info.path.join("README.md"), "# Modified content").unwrap();
866
867        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap();
868        let change = &results[0].changes[0];
869
870        assert!(
871            matches!(change.kind, ChangeKind::Modified),
872            "kind should be Modified"
873        );
874        assert!(
875            change.blob.is_some(),
876            "modified file should have a blob OID"
877        );
878    }
879
880    /// Deleted files should NOT have a blob OID (no content was written).
881    #[test]
882    fn collect_deleted_file_has_no_blob_oid() {
883        let (temp_dir, epoch) = setup_git_repo();
884        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
885        let ws_id = WorkspaceId::new("blob-del").unwrap();
886        let info = backend.create(&ws_id, &epoch).unwrap();
887
888        fs::remove_file(info.path.join("README.md")).unwrap();
889
890        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap();
891        let change = &results[0].changes[0];
892
893        assert!(
894            matches!(change.kind, ChangeKind::Deleted),
895            "kind should be Deleted"
896        );
897        assert!(
898            change.blob.is_none(),
899            "deleted file should have no blob OID"
900        );
901    }
902
903    /// Two different workspaces adding a file with identical content should
904    /// produce the same blob OID — demonstrating content-addressable identity.
905    #[test]
906    fn collect_same_content_produces_same_blob_oid() {
907        let (temp_dir, epoch) = setup_git_repo();
908        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
909
910        let content = b"pub fn shared() {}\n";
911
912        let ws_a = WorkspaceId::new("same-blob-a").unwrap();
913        let info_a = backend.create(&ws_a, &epoch).unwrap();
914        fs::write(info_a.path.join("shared.rs"), content).unwrap();
915
916        let ws_b = WorkspaceId::new("same-blob-b").unwrap();
917        let info_b = backend.create(&ws_b, &epoch).unwrap();
918        fs::write(info_b.path.join("shared.rs"), content).unwrap();
919
920        let results_a = collect_snapshots(temp_dir.path(), &backend, &[ws_a]).unwrap();
921        let results_b = collect_snapshots(temp_dir.path(), &backend, &[ws_b]).unwrap();
922
923        let blob_a = results_a[0].changes[0].blob.as_ref();
924        let blob_b = results_b[0].changes[0].blob.as_ref();
925
926        assert!(blob_a.is_some(), "ws_a should have a blob OID");
927        assert!(blob_b.is_some(), "ws_b should have a blob OID");
928        assert_eq!(
929            blob_a, blob_b,
930            "same content should produce the same blob OID (content-addressable)"
931        );
932    }
933
934    /// Modified files look up `FileId` from the epoch `FileIdMap` when available.
935    #[test]
936    fn collect_modified_file_uses_file_id_from_map() {
937        use crate::model::patch::FileId;
938
939        let (temp_dir, epoch) = setup_git_repo();
940        let backend = GitWorktreeBackend::new(temp_dir.path().to_path_buf());
941
942        // Pre-populate .manifold/fileids with a known FileId for README.md.
943        let known_id = FileId::new(0xdead_beef_cafe_babe_1234_5678_9abc_def0);
944        let fileids_path = temp_dir.path().join(".manifold").join("fileids");
945        // Replace the random id with our known id by rebuilding.
946        // Manually insert: we use a workaround since track_new is random.
947        // Build the map via save+reload with a known value.
948        let json = format!(r#"[{{"path":"README.md","file_id":"{known_id}"}}]"#);
949        fs::create_dir_all(fileids_path.parent().unwrap()).unwrap();
950        fs::write(&fileids_path, &json).unwrap();
951
952        let ws_id = WorkspaceId::new("fileid-mod").unwrap();
953        let info = backend.create(&ws_id, &epoch).unwrap();
954        fs::write(info.path.join("README.md"), "# Updated").unwrap();
955
956        let results = collect_snapshots(temp_dir.path(), &backend, &[ws_id]).unwrap();
957        let change = &results[0].changes[0];
958
959        assert_eq!(
960            change.file_id,
961            Some(known_id),
962            "modified file should inherit FileId from epoch FileIdMap"
963        );
964    }
965}