Skip to main content

maw/merge/
partition.rs

1//! PARTITION step of the N-way merge pipeline.
2//!
3//! Given a list of [`PatchSet`]s (one per workspace), builds an inverted index
4//! from path → list of workspace changes. Then partitions paths into:
5//!
6//! - **Unique paths**: touched by exactly 1 workspace → can be applied directly.
7//! - **Shared paths**: touched by 2+ workspaces → need conflict resolution.
8//!
9//! Paths are always sorted lexicographically for determinism.
10//!
11//! # Example
12//!
13//! ```text
14//! Workspace A: adds foo.rs, modifies bar.rs
15//! Workspace B: modifies bar.rs, deletes baz.rs
16//!
17//! Inverted index:
18//!   foo.rs → [(A, Added)]
19//!   bar.rs → [(A, Modified), (B, Modified)]
20//!   baz.rs → [(B, Deleted)]
21//!
22//! Partition:
23//!   unique: [baz.rs → (B, Deleted), foo.rs → (A, Added)]
24//!   shared: [bar.rs → [(A, Modified), (B, Modified)]]
25//! ```
26
27use std::collections::BTreeMap;
28use std::path::PathBuf;
29
30use crate::model::patch::FileId;
31use crate::model::types::{GitOid, WorkspaceId};
32
33use super::types::{ChangeKind, PatchSet};
34
35// ---------------------------------------------------------------------------
36// PathEntry
37// ---------------------------------------------------------------------------
38
39/// A single workspace's change to a particular file path.
40///
41/// Stored as entries in the inverted index. For non-deletions, `content`
42/// holds the new file content. For deletions, `content` is `None`.
43///
44/// `file_id` carries the stable [`FileId`] from the collect step (§5.8).
45/// When populated, the resolve step can group renames correctly — two entries
46/// with the same `FileId` but different paths represent a rename + content
47/// change rather than an independent add/delete pair.
48///
49/// `blob` is the git blob OID for the new content. The resolve step prefers
50/// OID equality (`blob == blob`) over byte-level content comparison.
51#[derive(Clone, Debug, PartialEq, Eq)]
52pub struct PathEntry {
53    /// The workspace that made this change.
54    pub workspace_id: WorkspaceId,
55    /// What kind of change was made.
56    pub kind: ChangeKind,
57    /// New file content (`None` for deletions).
58    pub content: Option<Vec<u8>>,
59    /// Stable file identity (§5.8). `None` for legacy/test paths without tracking.
60    pub file_id: Option<FileId>,
61    /// Git blob OID for the new content (Add/Modify only; `None` for Delete
62    /// and paths collected without git access).
63    pub blob: Option<GitOid>,
64}
65
66impl PathEntry {
67    /// Create a `PathEntry` without identity metadata (Phase 1 compat).
68    #[must_use]
69    pub const fn new(
70        workspace_id: WorkspaceId,
71        kind: ChangeKind,
72        content: Option<Vec<u8>>,
73    ) -> Self {
74        Self {
75            workspace_id,
76            kind,
77            content,
78            file_id: None,
79            blob: None,
80        }
81    }
82
83    /// Create a `PathEntry` with full identity metadata (Phase 3+).
84    #[must_use]
85    pub const fn with_identity(
86        workspace_id: WorkspaceId,
87        kind: ChangeKind,
88        content: Option<Vec<u8>>,
89        file_id: Option<FileId>,
90        blob: Option<GitOid>,
91    ) -> Self {
92        Self {
93            workspace_id,
94            kind,
95            content,
96            file_id,
97            blob,
98        }
99    }
100
101    /// Returns `true` if this entry is a deletion.
102    #[must_use]
103    pub const fn is_deletion(&self) -> bool {
104        matches!(self.kind, ChangeKind::Deleted)
105    }
106}
107
108// ---------------------------------------------------------------------------
109// PartitionResult
110// ---------------------------------------------------------------------------
111
112/// The result of partitioning patch-sets by path.
113///
114/// Paths are sorted lexicographically in both `unique` and `shared` for
115/// determinism.
116#[derive(Clone, Debug)]
117pub struct PartitionResult {
118    /// Paths touched by exactly 1 workspace. These can be applied directly
119    /// without conflict resolution.
120    ///
121    /// Each entry maps a path to the single workspace change.
122    pub unique: Vec<(PathBuf, PathEntry)>,
123
124    /// Paths touched by 2+ workspaces. These need conflict resolution
125    /// (hash equality check, diff3 merge, or conflict reporting).
126    ///
127    /// Each entry maps a path to all workspace changes for that path.
128    /// The inner `Vec` is sorted by workspace ID for determinism.
129    pub shared: Vec<(PathBuf, Vec<PathEntry>)>,
130}
131
132impl PartitionResult {
133    /// Total count of unique paths.
134    #[must_use]
135    pub const fn unique_count(&self) -> usize {
136        self.unique.len()
137    }
138
139    /// Total count of shared (potentially conflicting) paths.
140    #[must_use]
141    pub const fn shared_count(&self) -> usize {
142        self.shared.len()
143    }
144
145    /// Total count of all paths across unique and shared.
146    #[must_use]
147    pub const fn total_path_count(&self) -> usize {
148        self.unique.len() + self.shared.len()
149    }
150
151    /// Returns `true` if there are no shared paths (no conflicts possible).
152    #[must_use]
153    pub const fn is_conflict_free(&self) -> bool {
154        self.shared.is_empty()
155    }
156}
157
158// ---------------------------------------------------------------------------
159// partition_by_path
160// ---------------------------------------------------------------------------
161
162/// Partition a set of workspace patch-sets into unique and shared paths.
163///
164/// Builds an inverted index from path → workspace changes, then splits
165/// paths into those touched by exactly 1 workspace (unique) and those
166/// touched by 2+ workspaces (shared).
167///
168/// # Determinism
169///
170/// - Paths are processed in lexicographic order (via [`BTreeMap`]).
171/// - Within shared paths, entries are sorted by workspace ID.
172/// - Empty patch-sets are silently ignored (they contribute no paths).
173///
174/// # Arguments
175///
176/// * `patch_sets` — One `PatchSet` per workspace (from the collect step).
177///
178/// # Returns
179///
180/// A [`PartitionResult`] with unique and shared paths.
181#[must_use]
182pub fn partition_by_path(patch_sets: &[PatchSet]) -> PartitionResult {
183    // Build inverted index using BTreeMap for lexicographic ordering.
184    let mut index: BTreeMap<PathBuf, Vec<PathEntry>> = BTreeMap::new();
185
186    for ps in patch_sets {
187        for change in &ps.changes {
188            // Propagate FileId and blob OID from FileChange so that the
189            // resolve step can use OID equality and FileId-based rename
190            // tracking (§5.8).
191            let entry = PathEntry::with_identity(
192                ps.workspace_id.clone(),
193                change.kind.clone(),
194                change.content.clone(),
195                change.file_id,
196                change.blob.clone(),
197            );
198            index.entry(change.path.clone()).or_default().push(entry);
199        }
200    }
201
202    // Partition into unique and shared.
203    let mut unique = Vec::new();
204    let mut shared = Vec::new();
205
206    for (path, mut entries) in index {
207        if entries.len() == 1 {
208            // Unique: exactly 1 workspace touched this path.
209            unique.push((path, entries.remove(0)));
210        } else {
211            // Shared: 2+ workspaces touched this path.
212            // Sort by workspace ID for determinism.
213            entries.sort_by(|a, b| a.workspace_id.as_str().cmp(b.workspace_id.as_str()));
214            shared.push((path, entries));
215        }
216    }
217
218    // Paths are already sorted (BTreeMap iterates in order).
219
220    PartitionResult { unique, shared }
221}
222
223// ---------------------------------------------------------------------------
224// Tests
225// ---------------------------------------------------------------------------
226
227#[cfg(test)]
228#[allow(clippy::all, clippy::pedantic, clippy::nursery)]
229mod tests {
230    use super::*;
231    use crate::merge::types::{ChangeKind, FileChange, PatchSet};
232    use crate::model::types::{EpochId, WorkspaceId};
233
234    fn make_epoch() -> EpochId {
235        EpochId::new(&"a".repeat(40)).unwrap()
236    }
237
238    fn make_ws(name: &str) -> WorkspaceId {
239        WorkspaceId::new(name).unwrap()
240    }
241
242    fn make_change(path: &str, kind: ChangeKind, content: Option<&[u8]>) -> FileChange {
243        FileChange::new(PathBuf::from(path), kind, content.map(<[u8]>::to_vec))
244    }
245
246    // -- Empty inputs --
247
248    #[test]
249    fn partition_empty_patch_sets() {
250        let result = partition_by_path(&[]);
251        assert_eq!(result.unique_count(), 0);
252        assert_eq!(result.shared_count(), 0);
253        assert_eq!(result.total_path_count(), 0);
254        assert!(result.is_conflict_free());
255    }
256
257    #[test]
258    fn partition_single_empty_workspace() {
259        let ps = PatchSet::new(make_ws("ws-a"), make_epoch(), vec![]);
260        let result = partition_by_path(&[ps]);
261        assert_eq!(result.total_path_count(), 0);
262        assert!(result.is_conflict_free());
263    }
264
265    // -- All unique (disjoint changes) --
266
267    #[test]
268    fn partition_disjoint_changes_all_unique() {
269        let ps_a = PatchSet::new(
270            make_ws("ws-a"),
271            make_epoch(),
272            vec![make_change("a.rs", ChangeKind::Added, Some(b"fn a() {}"))],
273        );
274        let ps_b = PatchSet::new(
275            make_ws("ws-b"),
276            make_epoch(),
277            vec![make_change("b.rs", ChangeKind::Added, Some(b"fn b() {}"))],
278        );
279
280        let result = partition_by_path(&[ps_a, ps_b]);
281
282        assert_eq!(result.unique_count(), 2);
283        assert_eq!(result.shared_count(), 0);
284        assert!(result.is_conflict_free());
285
286        // Check paths are sorted lexicographically.
287        let unique_paths: Vec<_> = result.unique.iter().map(|(p, _)| p.clone()).collect();
288        assert_eq!(
289            unique_paths,
290            vec![PathBuf::from("a.rs"), PathBuf::from("b.rs")]
291        );
292
293        // Check workspace IDs.
294        assert_eq!(result.unique[0].1.workspace_id.as_str(), "ws-a");
295        assert_eq!(result.unique[1].1.workspace_id.as_str(), "ws-b");
296    }
297
298    // -- All shared (same file modified by both) --
299
300    #[test]
301    fn partition_shared_path() {
302        let ps_a = PatchSet::new(
303            make_ws("ws-a"),
304            make_epoch(),
305            vec![make_change("shared.rs", ChangeKind::Modified, Some(b"a"))],
306        );
307        let ps_b = PatchSet::new(
308            make_ws("ws-b"),
309            make_epoch(),
310            vec![make_change("shared.rs", ChangeKind::Modified, Some(b"b"))],
311        );
312
313        let result = partition_by_path(&[ps_a, ps_b]);
314
315        assert_eq!(result.unique_count(), 0);
316        assert_eq!(result.shared_count(), 1);
317        assert!(!result.is_conflict_free());
318
319        let (path, entries) = &result.shared[0];
320        assert_eq!(path, &PathBuf::from("shared.rs"));
321        assert_eq!(entries.len(), 2);
322        // Entries sorted by workspace ID.
323        assert_eq!(entries[0].workspace_id.as_str(), "ws-a");
324        assert_eq!(entries[1].workspace_id.as_str(), "ws-b");
325    }
326
327    // -- Mix of unique and shared --
328
329    #[test]
330    fn partition_mixed_unique_and_shared() {
331        let ps_a = PatchSet::new(
332            make_ws("ws-a"),
333            make_epoch(),
334            vec![
335                make_change("only-a.rs", ChangeKind::Added, Some(b"a")),
336                make_change("shared.rs", ChangeKind::Modified, Some(b"ver-a")),
337            ],
338        );
339        let ps_b = PatchSet::new(
340            make_ws("ws-b"),
341            make_epoch(),
342            vec![
343                make_change("only-b.rs", ChangeKind::Deleted, None),
344                make_change("shared.rs", ChangeKind::Modified, Some(b"ver-b")),
345            ],
346        );
347
348        let result = partition_by_path(&[ps_a, ps_b]);
349
350        assert_eq!(result.unique_count(), 2);
351        assert_eq!(result.shared_count(), 1);
352        assert_eq!(result.total_path_count(), 3);
353
354        // Unique paths sorted.
355        let unique_paths: Vec<_> = result.unique.iter().map(|(p, _)| p.clone()).collect();
356        assert_eq!(
357            unique_paths,
358            vec![PathBuf::from("only-a.rs"), PathBuf::from("only-b.rs")]
359        );
360
361        // Shared path.
362        let (shared_path, entries) = &result.shared[0];
363        assert_eq!(shared_path, &PathBuf::from("shared.rs"));
364        assert_eq!(entries.len(), 2);
365    }
366
367    // -- 3-way shared path --
368
369    #[test]
370    fn partition_three_way_shared() {
371        let ps_a = PatchSet::new(
372            make_ws("ws-a"),
373            make_epoch(),
374            vec![make_change("config.toml", ChangeKind::Modified, Some(b"a"))],
375        );
376        let ps_b = PatchSet::new(
377            make_ws("ws-b"),
378            make_epoch(),
379            vec![make_change("config.toml", ChangeKind::Modified, Some(b"b"))],
380        );
381        let ps_c = PatchSet::new(
382            make_ws("ws-c"),
383            make_epoch(),
384            vec![make_change("config.toml", ChangeKind::Modified, Some(b"c"))],
385        );
386
387        let result = partition_by_path(&[ps_a, ps_b, ps_c]);
388
389        assert_eq!(result.shared_count(), 1);
390        let (_, entries) = &result.shared[0];
391        assert_eq!(entries.len(), 3);
392        // Sorted by workspace ID.
393        assert_eq!(entries[0].workspace_id.as_str(), "ws-a");
394        assert_eq!(entries[1].workspace_id.as_str(), "ws-b");
395        assert_eq!(entries[2].workspace_id.as_str(), "ws-c");
396    }
397
398    // -- 5-way with disjoint and shared --
399
400    #[test]
401    fn partition_five_way_mixed() {
402        let workspaces: Vec<PatchSet> = (0..5)
403            .map(|i| {
404                let ws = make_ws(&format!("ws-{i}"));
405                let mut changes = vec![
406                    // Each workspace has a unique file.
407                    make_change(
408                        &format!("unique-{i}.rs"),
409                        ChangeKind::Added,
410                        Some(format!("fn ws_{i}() {{}}").as_bytes()),
411                    ),
412                ];
413                // All workspaces modify the shared file.
414                changes.push(make_change(
415                    "shared.rs",
416                    ChangeKind::Modified,
417                    Some(format!("version {i}").as_bytes()),
418                ));
419                PatchSet::new(ws, make_epoch(), changes)
420            })
421            .collect();
422
423        let result = partition_by_path(&workspaces);
424
425        assert_eq!(result.unique_count(), 5, "5 unique files");
426        assert_eq!(result.shared_count(), 1, "1 shared file");
427        assert_eq!(result.total_path_count(), 6);
428
429        let (_, entries) = &result.shared[0];
430        assert_eq!(entries.len(), 5, "5 workspaces modified shared.rs");
431    }
432
433    // -- Deletion entries --
434
435    #[test]
436    fn partition_preserves_deletion_info() {
437        let ps = PatchSet::new(
438            make_ws("ws-a"),
439            make_epoch(),
440            vec![make_change("gone.rs", ChangeKind::Deleted, None)],
441        );
442
443        let result = partition_by_path(&[ps]);
444
445        assert_eq!(result.unique_count(), 1);
446        let (path, entry) = &result.unique[0];
447        assert_eq!(path, &PathBuf::from("gone.rs"));
448        assert!(entry.is_deletion());
449        assert!(entry.content.is_none());
450    }
451
452    // -- Content preserved --
453
454    #[test]
455    fn partition_preserves_file_content() {
456        let content = b"hello world\nline 2\n";
457        let ps = PatchSet::new(
458            make_ws("ws-a"),
459            make_epoch(),
460            vec![make_change("hello.txt", ChangeKind::Added, Some(content))],
461        );
462
463        let result = partition_by_path(&[ps]);
464
465        let (_, entry) = &result.unique[0];
466        assert_eq!(entry.content.as_deref(), Some(content.as_ref()));
467    }
468
469    // -- Path ordering --
470
471    #[test]
472    fn partition_paths_are_lexicographic() {
473        let ps = PatchSet::new(
474            make_ws("ws-a"),
475            make_epoch(),
476            vec![
477                make_change("z.rs", ChangeKind::Added, Some(b"")),
478                make_change("a.rs", ChangeKind::Added, Some(b"")),
479                make_change("m/deep.rs", ChangeKind::Added, Some(b"")),
480                make_change("b.rs", ChangeKind::Added, Some(b"")),
481            ],
482        );
483
484        let result = partition_by_path(&[ps]);
485
486        let paths: Vec<_> = result.unique.iter().map(|(p, _)| p.clone()).collect();
487        assert_eq!(
488            paths,
489            vec![
490                PathBuf::from("a.rs"),
491                PathBuf::from("b.rs"),
492                PathBuf::from("m/deep.rs"),
493                PathBuf::from("z.rs"),
494            ]
495        );
496    }
497
498    // -- Modify/delete conflict --
499
500    #[test]
501    fn partition_modify_delete_is_shared() {
502        let ps_a = PatchSet::new(
503            make_ws("ws-a"),
504            make_epoch(),
505            vec![make_change("file.rs", ChangeKind::Modified, Some(b"new"))],
506        );
507        let ps_b = PatchSet::new(
508            make_ws("ws-b"),
509            make_epoch(),
510            vec![make_change("file.rs", ChangeKind::Deleted, None)],
511        );
512
513        let result = partition_by_path(&[ps_a, ps_b]);
514
515        assert_eq!(result.shared_count(), 1);
516        let (_, entries) = &result.shared[0];
517        assert_eq!(entries.len(), 2);
518        assert!(matches!(entries[0].kind, ChangeKind::Modified));
519        assert!(matches!(entries[1].kind, ChangeKind::Deleted));
520    }
521
522    // -- Add/add conflict --
523
524    #[test]
525    fn partition_add_add_is_shared() {
526        let ps_a = PatchSet::new(
527            make_ws("ws-a"),
528            make_epoch(),
529            vec![make_change("new.rs", ChangeKind::Added, Some(b"version a"))],
530        );
531        let ps_b = PatchSet::new(
532            make_ws("ws-b"),
533            make_epoch(),
534            vec![make_change("new.rs", ChangeKind::Added, Some(b"version b"))],
535        );
536
537        let result = partition_by_path(&[ps_a, ps_b]);
538
539        assert_eq!(result.unique_count(), 0);
540        assert_eq!(result.shared_count(), 1);
541        let (_, entries) = &result.shared[0];
542        assert_eq!(entries.len(), 2);
543        assert!(matches!(entries[0].kind, ChangeKind::Added));
544        assert!(matches!(entries[1].kind, ChangeKind::Added));
545    }
546
547    // -- Delete/delete is shared (but trivially resolvable) --
548
549    #[test]
550    fn partition_delete_delete_is_shared() {
551        let ps_a = PatchSet::new(
552            make_ws("ws-a"),
553            make_epoch(),
554            vec![make_change("old.rs", ChangeKind::Deleted, None)],
555        );
556        let ps_b = PatchSet::new(
557            make_ws("ws-b"),
558            make_epoch(),
559            vec![make_change("old.rs", ChangeKind::Deleted, None)],
560        );
561
562        let result = partition_by_path(&[ps_a, ps_b]);
563
564        assert_eq!(result.shared_count(), 1);
565        let (_, entries) = &result.shared[0];
566        assert_eq!(entries.len(), 2);
567        // Both deletions.
568        assert!(entries.iter().all(super::PathEntry::is_deletion));
569    }
570
571    // -- PathEntry --
572
573    #[test]
574    fn path_entry_is_deletion() {
575        let del = PathEntry::new(make_ws("ws"), ChangeKind::Deleted, None);
576        assert!(del.is_deletion());
577
578        let add = PathEntry::new(make_ws("ws"), ChangeKind::Added, Some(vec![]));
579        assert!(!add.is_deletion());
580    }
581
582    // -----------------------------------------------------------------------
583    // Phase 3: FileId + blob OID propagation through partition
584    // -----------------------------------------------------------------------
585
586    /// Helper: build a `FileChange` with identity metadata (`FileId` + blob OID).
587    fn make_change_with_identity(
588        path: &str,
589        kind: ChangeKind,
590        content: Option<&[u8]>,
591        file_id: crate::model::patch::FileId,
592        blob_hex: Option<&str>,
593    ) -> FileChange {
594        let blob = blob_hex.and_then(|h| crate::model::types::GitOid::new(h).ok());
595        FileChange::with_identity(
596            PathBuf::from(path),
597            kind,
598            content.map(<[u8]>::to_vec),
599            Some(file_id),
600            blob,
601        )
602    }
603
604    /// `FileId` and blob OID on a `FileChange` should be propagated into the
605    /// `PathEntry` that appears in the partition result.
606    #[test]
607    fn partition_propagates_file_id_and_blob_to_path_entry() {
608        use crate::model::patch::FileId;
609
610        let fid = FileId::new(0xdead_beef_cafe_babe_1234_5678_9abc_def0);
611        let blob_hex = "a".repeat(40);
612
613        let change = make_change_with_identity(
614            "src/lib.rs",
615            ChangeKind::Modified,
616            Some(b"fn lib() {}"),
617            fid,
618            Some(&blob_hex),
619        );
620        let ps = PatchSet::new(make_ws("ws-a"), make_epoch(), vec![change]);
621
622        let result = partition_by_path(&[ps]);
623
624        // The file was only modified by one workspace → it's a unique path.
625        assert_eq!(result.unique_count(), 1);
626        let (path, entry) = &result.unique[0];
627        assert_eq!(path, &PathBuf::from("src/lib.rs"));
628        assert_eq!(
629            entry.file_id,
630            Some(fid),
631            "FileId should propagate from FileChange to PathEntry"
632        );
633        assert!(
634            entry.blob.is_some(),
635            "blob OID should propagate from FileChange to PathEntry"
636        );
637    }
638
639    /// `FileId` and blob OID propagate correctly into shared (multi-workspace) entries.
640    #[test]
641    fn partition_propagates_identity_into_shared_entries() {
642        use crate::model::patch::FileId;
643
644        let fid_a = FileId::new(1);
645        let fid_b = FileId::new(2);
646        let blob_a = "a".repeat(40);
647        let blob_b = "b".repeat(40);
648
649        let change_a = make_change_with_identity(
650            "shared.rs",
651            ChangeKind::Modified,
652            Some(b"version A"),
653            fid_a,
654            Some(&blob_a),
655        );
656        let change_b = make_change_with_identity(
657            "shared.rs",
658            ChangeKind::Modified,
659            Some(b"version B"),
660            fid_b,
661            Some(&blob_b),
662        );
663
664        let ps_a = PatchSet::new(make_ws("ws-a"), make_epoch(), vec![change_a]);
665        let ps_b = PatchSet::new(make_ws("ws-b"), make_epoch(), vec![change_b]);
666
667        let result = partition_by_path(&[ps_a, ps_b]);
668        assert_eq!(result.shared_count(), 1);
669
670        let (_, entries) = &result.shared[0];
671        assert_eq!(entries.len(), 2);
672
673        // Find ws-a and ws-b entries.
674        let entry_a = entries
675            .iter()
676            .find(|e| e.workspace_id.as_str() == "ws-a")
677            .unwrap();
678        let entry_b = entries
679            .iter()
680            .find(|e| e.workspace_id.as_str() == "ws-b")
681            .unwrap();
682
683        assert_eq!(entry_a.file_id, Some(fid_a));
684        assert_eq!(entry_b.file_id, Some(fid_b));
685        assert!(entry_a.blob.is_some());
686        assert!(entry_b.blob.is_some());
687        // Blobs should differ (different content).
688        assert_ne!(entry_a.blob, entry_b.blob);
689    }
690
691    /// `FileChange` without identity (Phase 1 compat) results in None fields in `PathEntry`.
692    #[test]
693    fn partition_phase1_change_has_no_identity_in_path_entry() {
694        let change = make_change("old_style.rs", ChangeKind::Added, Some(b"fn old() {}"));
695        let ps = PatchSet::new(make_ws("ws-legacy"), make_epoch(), vec![change]);
696        let result = partition_by_path(&[ps]);
697
698        let (_, entry) = &result.unique[0];
699        assert!(
700            entry.file_id.is_none(),
701            "Phase 1 FileChange should produce PathEntry with no FileId"
702        );
703        assert!(
704            entry.blob.is_none(),
705            "Phase 1 FileChange should produce PathEntry with no blob OID"
706        );
707    }
708}