Skip to main content

jj_lib/
absorb.rs

1// Copyright 2024 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Algorithm to split changes in a single source commit into its most relevant
16//! ancestors, 'absorbing' them away.
17
18use std::cmp;
19use std::collections::HashMap;
20use std::ops::Range;
21use std::sync::Arc;
22
23use bstr::BString;
24use futures::StreamExt as _;
25use itertools::Itertools as _;
26use thiserror::Error;
27
28use crate::annotate::FileAnnotator;
29use crate::backend::BackendError;
30use crate::backend::BackendResult;
31use crate::backend::CommitId;
32use crate::backend::TreeValue;
33use crate::commit::Commit;
34use crate::commit::conflict_label_for_commits;
35use crate::conflicts::MaterializedFileValue;
36use crate::conflicts::MaterializedTreeValue;
37use crate::conflicts::materialized_diff_stream;
38use crate::copies::CopyRecords;
39use crate::diff::ContentDiff;
40use crate::diff::DiffHunkKind;
41use crate::matchers::Matcher;
42use crate::merge::Diff;
43use crate::merge::Merge;
44use crate::merged_tree::MergedTree;
45use crate::merged_tree_builder::MergedTreeBuilder;
46use crate::repo::MutableRepo;
47use crate::repo::Repo;
48use crate::repo_path::RepoPathBuf;
49use crate::revset::ResolvedRevsetExpression;
50use crate::revset::RevsetEvaluationError;
51
52/// The source commit to absorb into its ancestry.
53#[derive(Clone, Debug)]
54pub struct AbsorbSource {
55    commit: Commit,
56    parents: Vec<Commit>,
57    parent_tree: MergedTree,
58}
59
60impl AbsorbSource {
61    /// Create an absorb source from a single commit.
62    pub async fn from_commit(repo: &dyn Repo, commit: Commit) -> BackendResult<Self> {
63        let parents = commit.parents().await?;
64        let parent_tree = commit.parent_tree(repo).await?;
65        Ok(Self {
66            commit,
67            parents,
68            parent_tree,
69        })
70    }
71}
72
73/// Error splitting an absorb source into modified ancestry trees.
74#[derive(Debug, Error)]
75pub enum AbsorbError {
76    /// Error while contacting the Backend.
77    #[error(transparent)]
78    Backend(#[from] BackendError),
79    /// Error resolving commit ancestry.
80    #[error(transparent)]
81    RevsetEvaluation(#[from] RevsetEvaluationError),
82}
83
84/// An absorb 'plan' indicating which commits should be modified and what they
85/// should be modified to.
86#[derive(Default)]
87pub struct SelectedTrees {
88    /// Commits to be modified, to be passed to `absorb_hunks`.
89    pub target_commits: HashMap<CommitId, MergedTreeBuilder>,
90    /// Paths that were not absorbed for various error reasons.
91    pub skipped_paths: Vec<(RepoPathBuf, String)>,
92}
93
94/// Builds trees to be merged into destination commits by splitting source
95/// changes based on file annotation.
96pub async fn split_hunks_to_trees(
97    repo: &dyn Repo,
98    source: &AbsorbSource,
99    destinations: &Arc<ResolvedRevsetExpression>,
100    matcher: &dyn Matcher,
101) -> Result<SelectedTrees, AbsorbError> {
102    let mut selected_trees = SelectedTrees::default();
103
104    let left_tree = &source.parent_tree;
105    let right_tree = source.commit.tree();
106    // TODO: enable copy tracking if we add support for annotate and merge
107    let copy_records = CopyRecords::default();
108    let tree_diff = left_tree.diff_stream_with_copies(&right_tree, matcher, &copy_records);
109    let mut diff_stream = materialized_diff_stream(
110        repo.store(),
111        tree_diff,
112        Diff::new(left_tree.labels(), right_tree.labels()),
113    );
114    while let Some(entry) = diff_stream.next().await {
115        let left_path = entry.path.source();
116        let right_path = entry.path.target();
117        let values = entry.values?;
118        let (left_text, executable, copy_id) = match to_file_value(values.before) {
119            Ok(Some(mut value)) => (
120                value.read_all(left_path).await?,
121                value.executable,
122                value.copy_id,
123            ),
124            // New file should have no destinations
125            Ok(None) => continue,
126            Err(reason) => {
127                selected_trees
128                    .skipped_paths
129                    .push((left_path.to_owned(), reason));
130                continue;
131            }
132        };
133        let (right_text, deleted) = match to_file_value(values.after) {
134            Ok(Some(mut value)) => (value.read_all(right_path).await?, false),
135            Ok(None) => (vec![], true),
136            Err(reason) => {
137                selected_trees
138                    .skipped_paths
139                    .push((right_path.to_owned(), reason));
140                continue;
141            }
142        };
143
144        // Compute annotation of parent (= left) content to map right hunks
145        let mut annotator =
146            FileAnnotator::with_file_content(source.commit.id(), left_path, left_text.clone());
147        annotator.compute(repo, destinations)?;
148        let annotation = annotator.to_annotation();
149        let annotation_ranges = annotation
150            .compact_line_ranges()
151            .filter_map(|(commit_id, range)| Some((commit_id.ok()?, range)))
152            .collect_vec();
153        let diff = ContentDiff::by_line([&left_text, &right_text]);
154        let selected_ranges = split_file_hunks(&annotation_ranges, &diff);
155        // Build trees containing parent (= left) contents + selected hunks
156        for (&commit_id, ranges) in &selected_ranges {
157            let tree_builder = selected_trees
158                .target_commits
159                .entry(commit_id.clone())
160                .or_insert_with(|| MergedTreeBuilder::new(left_tree.clone()));
161            let new_text = combine_texts(&left_text, &right_text, ranges);
162            // Since changes to be absorbed are represented as diffs relative to
163            // the source parent, we can propagate file deletion only if the
164            // whole file content is deleted at a single destination commit.
165            let new_tree_value = if new_text.is_empty() && deleted {
166                Merge::absent()
167            } else {
168                let id = repo
169                    .store()
170                    .write_file(left_path, &mut new_text.as_slice())
171                    .await?;
172                Merge::normal(TreeValue::File {
173                    id,
174                    executable,
175                    copy_id: copy_id.clone(),
176                })
177            };
178            tree_builder.set_or_remove(left_path.to_owned(), new_tree_value);
179        }
180    }
181
182    Ok(selected_trees)
183}
184
185type SelectedRange = (Range<usize>, Range<usize>);
186
187/// Maps `diff` hunks to commits based on the left `annotation_ranges`. The
188/// `annotation_ranges` should be compacted.
189fn split_file_hunks<'a>(
190    mut annotation_ranges: &[(&'a CommitId, Range<usize>)],
191    diff: &ContentDiff,
192) -> HashMap<&'a CommitId, Vec<SelectedRange>> {
193    debug_assert!(annotation_ranges.iter().all(|(_, range)| !range.is_empty()));
194    let mut selected_ranges: HashMap<&CommitId, Vec<_>> = HashMap::new();
195    let mut diff_hunk_ranges = diff
196        .hunk_ranges()
197        .filter(|hunk| hunk.kind == DiffHunkKind::Different);
198    while !annotation_ranges.is_empty() {
199        let Some(hunk) = diff_hunk_ranges.next() else {
200            break;
201        };
202        let [left_range, right_range]: &[_; 2] = hunk.ranges[..].try_into().unwrap();
203        assert!(!left_range.is_empty() || !right_range.is_empty());
204        if right_range.is_empty() {
205            // If the hunk is pure deletion, it can be mapped to multiple
206            // overlapped annotation ranges unambiguously.
207            let skip = annotation_ranges
208                .iter()
209                .take_while(|(_, range)| range.end <= left_range.start)
210                .count();
211            annotation_ranges = &annotation_ranges[skip..];
212            let pre_overlap = annotation_ranges
213                .iter()
214                .take_while(|(_, range)| range.end < left_range.end)
215                .count();
216            let maybe_overlapped_ranges = annotation_ranges.get(..pre_overlap + 1);
217            annotation_ranges = &annotation_ranges[pre_overlap..];
218            let Some(overlapped_ranges) = maybe_overlapped_ranges else {
219                continue;
220            };
221            // Ensure that the ranges are contiguous and include the start.
222            let all_covered = overlapped_ranges
223                .iter()
224                .try_fold(left_range.start, |prev_end, (_, cur)| {
225                    (cur.start <= prev_end).then_some(cur.end)
226                })
227                .inspect(|&last_end| assert!(left_range.end <= last_end))
228                .is_some();
229            if all_covered {
230                for (commit_id, cur_range) in overlapped_ranges {
231                    let start = cmp::max(cur_range.start, left_range.start);
232                    let end = cmp::min(cur_range.end, left_range.end);
233                    assert!(start < end);
234                    let selected = selected_ranges.entry(commit_id).or_default();
235                    selected.push((start..end, right_range.clone()));
236                }
237            }
238        } else {
239            // In other cases, the hunk should be included in an annotation
240            // range to map it unambiguously. Skip any pre-overlapped ranges.
241            let skip = annotation_ranges
242                .iter()
243                .take_while(|(_, range)| range.end < left_range.end)
244                .count();
245            annotation_ranges = &annotation_ranges[skip..];
246            let Some((commit_id, cur_range)) = annotation_ranges.first() else {
247                continue;
248            };
249            let contained = cur_range.start <= left_range.start && left_range.end <= cur_range.end;
250            // If the hunk is pure insertion, it can be mapped to two distinct
251            // annotation ranges, which is ambiguous.
252            let ambiguous = cur_range.end == left_range.start
253                && annotation_ranges
254                    .get(1)
255                    .is_some_and(|(_, next_range)| next_range.start == left_range.end);
256            if contained && !ambiguous {
257                let selected = selected_ranges.entry(commit_id).or_default();
258                selected.push((left_range.clone(), right_range.clone()));
259            }
260        }
261    }
262    selected_ranges
263}
264
265/// Constructs new text by replacing `text1` range with `text2` range for each
266/// selected `(range1, range2)` pairs.
267fn combine_texts(text1: &[u8], text2: &[u8], selected_ranges: &[SelectedRange]) -> BString {
268    itertools::chain!(
269        [(0..0, 0..0)],
270        selected_ranges.iter().cloned(),
271        [(text1.len()..text1.len(), text2.len()..text2.len())],
272    )
273    .tuple_windows()
274    // Copy unchanged hunk from text1 and current hunk from text2
275    .map(|((prev1, _), (cur1, cur2))| (prev1.end..cur1.start, cur2))
276    .flat_map(|(range1, range2)| [&text1[range1], &text2[range2]])
277    .collect()
278}
279
280/// Describes changes made by [`absorb_hunks()`].
281#[derive(Clone, Debug)]
282pub struct AbsorbStats {
283    /// Rewritten source commit which the absorbed hunks were removed, or `None`
284    /// if the source commit was abandoned or no hunks were moved.
285    pub rewritten_source: Option<Commit>,
286    /// Rewritten commits which the source hunks were absorbed into, in forward
287    /// topological order.
288    pub rewritten_destinations: Vec<Commit>,
289    /// Number of descendant commits which were rebased. The number of rewritten
290    /// destination commits are not included.
291    pub num_rebased: usize,
292}
293
294/// Merges selected trees into the specified commits. Abandons the source commit
295/// if it becomes discardable.
296pub async fn absorb_hunks(
297    repo: &mut MutableRepo,
298    source: &AbsorbSource,
299    mut selected_trees: HashMap<CommitId, MergedTreeBuilder>,
300) -> BackendResult<AbsorbStats> {
301    let mut rewritten_source = None;
302    let mut rewritten_destinations = Vec::new();
303    let mut num_rebased = 0;
304    let parents_label = conflict_label_for_commits(&source.parents);
305    let source_commit_label = source.commit.conflict_label();
306    // Rewrite commits in topological order so that descendant commits wouldn't
307    // be rewritten multiple times.
308    repo.transform_descendants(selected_trees.keys().cloned().collect(), async |rewriter| {
309        // Remove selected hunks from the source commit by reparent()
310        if rewriter.old_commit().id() == source.commit.id() {
311            let commit_builder = rewriter.reparent();
312            if commit_builder.is_discardable()? {
313                commit_builder.abandon();
314            } else {
315                rewritten_source = Some(commit_builder.write().await?);
316                num_rebased += 1;
317            }
318            return Ok(());
319        }
320        let Some(tree_builder) = selected_trees.remove(rewriter.old_commit().id()) else {
321            rewriter.rebase().await?.write().await?;
322            num_rebased += 1;
323            return Ok(());
324        };
325        // Merge hunks between source parent tree and selected tree
326        let selected_tree = tree_builder.write_tree().await?;
327        let destination_label = rewriter.old_commit().conflict_label();
328        let commit_builder = rewriter.rebase().await?;
329        let destination_tree = commit_builder.tree();
330        let new_tree = MergedTree::merge(Merge::from_vec(vec![
331            (
332                destination_tree,
333                format!("{destination_label} (absorb destination)"),
334            ),
335            (
336                source.parent_tree.clone(),
337                format!("{parents_label} (parents of absorbed revision)"),
338            ),
339            (
340                selected_tree,
341                format!("absorbed changes (from {source_commit_label})"),
342            ),
343        ]))
344        .await?;
345        let mut predecessors = commit_builder.predecessors().to_vec();
346        predecessors.push(source.commit.id().clone());
347        let new_commit = commit_builder
348            .set_tree(new_tree)
349            .set_predecessors(predecessors)
350            .write()
351            .await?;
352        rewritten_destinations.push(new_commit);
353        Ok(())
354    })
355    .await?;
356    Ok(AbsorbStats {
357        rewritten_source,
358        rewritten_destinations,
359        num_rebased,
360    })
361}
362
363fn to_file_value(value: MaterializedTreeValue) -> Result<Option<MaterializedFileValue>, String> {
364    match value {
365        MaterializedTreeValue::Absent => Ok(None), // New or deleted file
366        MaterializedTreeValue::AccessDenied(err) => Err(format!("Access is denied: {err}")),
367        MaterializedTreeValue::File(file) => Ok(Some(file)),
368        MaterializedTreeValue::Symlink { .. } => Err("Is a symlink".into()),
369        MaterializedTreeValue::FileConflict(_) | MaterializedTreeValue::OtherConflict { .. } => {
370            Err("Is a conflict".into())
371        }
372        MaterializedTreeValue::GitSubmodule(_) => Err("Is a Git submodule".into()),
373        MaterializedTreeValue::Tree(_) => panic!("diff should not contain trees"),
374    }
375}
376
377#[cfg(test)]
378mod tests {
379    use maplit::hashmap;
380
381    use super::*;
382
383    #[test]
384    fn test_split_file_hunks_empty_or_single_line() {
385        let commit_id1 = &CommitId::from_hex("111111");
386
387        // unchanged
388        assert_eq!(
389            split_file_hunks(&[], &ContentDiff::by_line(["", ""])),
390            hashmap! {}
391        );
392
393        // insert single line
394        assert_eq!(
395            split_file_hunks(&[], &ContentDiff::by_line(["", "2X\n"])),
396            hashmap! {}
397        );
398        // delete single line
399        assert_eq!(
400            split_file_hunks(&[(commit_id1, 0..3)], &ContentDiff::by_line(["1a\n", ""])),
401            hashmap! { commit_id1 => vec![(0..3, 0..0)] }
402        );
403        // modify single line
404        assert_eq!(
405            split_file_hunks(
406                &[(commit_id1, 0..3)],
407                &ContentDiff::by_line(["1a\n", "1AA\n"])
408            ),
409            hashmap! { commit_id1 => vec![(0..3, 0..4)] }
410        );
411    }
412
413    #[test]
414    fn test_split_file_hunks_single_range() {
415        let commit_id1 = &CommitId::from_hex("111111");
416
417        // insert first, middle, and last lines
418        assert_eq!(
419            split_file_hunks(
420                &[(commit_id1, 0..6)],
421                &ContentDiff::by_line(["1a\n1b\n", "1X\n1a\n1Y\n1b\n1Z\n"])
422            ),
423            hashmap! {
424                commit_id1 => vec![(0..0, 0..3), (3..3, 6..9), (6..6, 12..15)],
425            }
426        );
427        // delete first, middle, and last lines
428        assert_eq!(
429            split_file_hunks(
430                &[(commit_id1, 0..15)],
431                &ContentDiff::by_line(["1a\n1b\n1c\n1d\n1e\n1f\n", "1b\n1d\n1f\n"])
432            ),
433            hashmap! {
434                commit_id1 => vec![(0..3, 0..0), (6..9, 3..3), (12..15, 6..6)],
435            }
436        );
437        // modify non-contiguous lines
438        assert_eq!(
439            split_file_hunks(
440                &[(commit_id1, 0..12)],
441                &ContentDiff::by_line(["1a\n1b\n1c\n1d\n", "1A\n1b\n1C\n1d\n"])
442            ),
443            hashmap! { commit_id1 => vec![(0..3, 0..3), (6..9, 6..9)] }
444        );
445    }
446
447    #[test]
448    fn test_split_file_hunks_contiguous_ranges_insert() {
449        let commit_id1 = &CommitId::from_hex("111111");
450        let commit_id2 = &CommitId::from_hex("222222");
451
452        // insert first line
453        assert_eq!(
454            split_file_hunks(
455                &[(commit_id1, 0..6), (commit_id2, 6..12)],
456                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1X\n1a\n1b\n2a\n2b\n"])
457            ),
458            hashmap! { commit_id1 => vec![(0..0, 0..3)] }
459        );
460        // insert middle line to first range
461        assert_eq!(
462            split_file_hunks(
463                &[(commit_id1, 0..6), (commit_id2, 6..12)],
464                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1X\n1b\n2a\n2b\n"])
465            ),
466            hashmap! { commit_id1 => vec![(3..3, 3..6)] }
467        );
468        // insert middle line between ranges (ambiguous)
469        assert_eq!(
470            split_file_hunks(
471                &[(commit_id1, 0..6), (commit_id2, 6..12)],
472                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n3X\n2a\n2b\n"])
473            ),
474            hashmap! {}
475        );
476        // insert middle line to second range
477        assert_eq!(
478            split_file_hunks(
479                &[(commit_id1, 0..6), (commit_id2, 6..12)],
480                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2X\n2b\n"])
481            ),
482            hashmap! { commit_id2 => vec![(9..9, 9..12)] }
483        );
484        // insert last line
485        assert_eq!(
486            split_file_hunks(
487                &[(commit_id1, 0..6), (commit_id2, 6..12)],
488                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2b\n2X\n"])
489            ),
490            hashmap! { commit_id2 => vec![(12..12, 12..15)] }
491        );
492    }
493
494    #[test]
495    fn test_split_file_hunks_contiguous_ranges_delete() {
496        let commit_id1 = &CommitId::from_hex("111111");
497        let commit_id2 = &CommitId::from_hex("222222");
498
499        // delete first line
500        assert_eq!(
501            split_file_hunks(
502                &[(commit_id1, 0..6), (commit_id2, 6..12)],
503                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1b\n2a\n2b\n"])
504            ),
505            hashmap! { commit_id1 => vec![(0..3, 0..0)] }
506        );
507        // delete middle line from first range
508        assert_eq!(
509            split_file_hunks(
510                &[(commit_id1, 0..6), (commit_id2, 6..12)],
511                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n2a\n2b\n"])
512            ),
513            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
514        );
515        // delete middle line from second range
516        assert_eq!(
517            split_file_hunks(
518                &[(commit_id1, 0..6), (commit_id2, 6..12)],
519                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2b\n"])
520            ),
521            hashmap! { commit_id2 => vec![(6..9, 6..6)] }
522        );
523        // delete last line
524        assert_eq!(
525            split_file_hunks(
526                &[(commit_id1, 0..6), (commit_id2, 6..12)],
527                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n"])
528            ),
529            hashmap! { commit_id2 => vec![(9..12, 9..9)] }
530        );
531        // delete first and last lines
532        assert_eq!(
533            split_file_hunks(
534                &[(commit_id1, 0..6), (commit_id2, 6..12)],
535                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1b\n2a\n"])
536            ),
537            hashmap! {
538                commit_id1 => vec![(0..3, 0..0)],
539                commit_id2 => vec![(9..12, 6..6)],
540            }
541        );
542
543        // delete across ranges (split first annotation range)
544        assert_eq!(
545            split_file_hunks(
546                &[(commit_id1, 0..6), (commit_id2, 6..12)],
547                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n"])
548            ),
549            hashmap! {
550                commit_id1 => vec![(3..6, 3..3)],
551                commit_id2 => vec![(6..12, 3..3)],
552            }
553        );
554        // delete middle lines across ranges (split both annotation ranges)
555        assert_eq!(
556            split_file_hunks(
557                &[(commit_id1, 0..6), (commit_id2, 6..12)],
558                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n2b\n"])
559            ),
560            hashmap! {
561                commit_id1 => vec![(3..6, 3..3)],
562                commit_id2 => vec![(6..9, 3..3)],
563            }
564        );
565        // delete across ranges (split second annotation range)
566        assert_eq!(
567            split_file_hunks(
568                &[(commit_id1, 0..6), (commit_id2, 6..12)],
569                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "2b\n"])
570            ),
571            hashmap! {
572                commit_id1 => vec![(0..6, 0..0)],
573                commit_id2 => vec![(6..9, 0..0)],
574            }
575        );
576
577        // delete all
578        assert_eq!(
579            split_file_hunks(
580                &[(commit_id1, 0..6), (commit_id2, 6..12)],
581                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", ""])
582            ),
583            hashmap! {
584                commit_id1 => vec![(0..6, 0..0)],
585                commit_id2 => vec![(6..12, 0..0)],
586            }
587        );
588    }
589
590    #[test]
591    fn test_split_file_hunks_contiguous_ranges_modify() {
592        let commit_id1 = &CommitId::from_hex("111111");
593        let commit_id2 = &CommitId::from_hex("222222");
594
595        // modify first line
596        assert_eq!(
597            split_file_hunks(
598                &[(commit_id1, 0..6), (commit_id2, 6..12)],
599                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2a\n2b\n"])
600            ),
601            hashmap! { commit_id1 => vec![(0..3, 0..3)] }
602        );
603        // modify middle line of first range
604        assert_eq!(
605            split_file_hunks(
606                &[(commit_id1, 0..6), (commit_id2, 6..12)],
607                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2a\n2b\n"])
608            ),
609            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
610        );
611        // modify middle lines of both ranges (ambiguous)
612        // ('hg absorb' accepts this)
613        assert_eq!(
614            split_file_hunks(
615                &[(commit_id1, 0..6), (commit_id2, 6..12)],
616                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2A\n2b\n"])
617            ),
618            hashmap! {}
619        );
620        // modify middle line of second range
621        assert_eq!(
622            split_file_hunks(
623                &[(commit_id1, 0..6), (commit_id2, 6..12)],
624                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2A\n2b\n"])
625            ),
626            hashmap! { commit_id2 => vec![(6..9, 6..9)] }
627        );
628        // modify last line
629        assert_eq!(
630            split_file_hunks(
631                &[(commit_id1, 0..6), (commit_id2, 6..12)],
632                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2B\n"])
633            ),
634            hashmap! { commit_id2 => vec![(9..12, 9..12)] }
635        );
636        // modify first and last lines
637        assert_eq!(
638            split_file_hunks(
639                &[(commit_id1, 0..6), (commit_id2, 6..12)],
640                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2a\n2B\n"])
641            ),
642            hashmap! {
643                commit_id1 => vec![(0..3, 0..3)],
644                commit_id2 => vec![(9..12, 9..12)],
645            }
646        );
647    }
648
649    #[test]
650    fn test_split_file_hunks_contiguous_ranges_modify_insert() {
651        let commit_id1 = &CommitId::from_hex("111111");
652        let commit_id2 = &CommitId::from_hex("222222");
653
654        // modify first range, insert adjacent middle line
655        assert_eq!(
656            split_file_hunks(
657                &[(commit_id1, 0..6), (commit_id2, 6..12)],
658                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1B\n1X\n2a\n2b\n"])
659            ),
660            hashmap! { commit_id1 => vec![(0..6, 0..9)] }
661        );
662        // modify second range, insert adjacent middle line
663        assert_eq!(
664            split_file_hunks(
665                &[(commit_id1, 0..6), (commit_id2, 6..12)],
666                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2X\n2A\n2B\n"])
667            ),
668            hashmap! { commit_id2 => vec![(6..12, 6..15)] }
669        );
670        // modify second range, insert last line
671        assert_eq!(
672            split_file_hunks(
673                &[(commit_id1, 0..6), (commit_id2, 6..12)],
674                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2A\n2B\n2X\n"])
675            ),
676            hashmap! { commit_id2 => vec![(6..12, 6..15)] }
677        );
678        // modify first and last lines (unambiguous), insert middle line between
679        // ranges (ambiguous)
680        assert_eq!(
681            split_file_hunks(
682                &[(commit_id1, 0..6), (commit_id2, 6..12)],
683                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n3X\n2a\n2B\n"])
684            ),
685            hashmap! {
686                commit_id1 => vec![(0..3, 0..3)],
687                commit_id2 => vec![(9..12, 12..15)],
688            }
689        );
690    }
691
692    #[test]
693    fn test_split_file_hunks_contiguous_ranges_modify_delete() {
694        let commit_id1 = &CommitId::from_hex("111111");
695        let commit_id2 = &CommitId::from_hex("222222");
696
697        // modify first line, delete adjacent middle line
698        assert_eq!(
699            split_file_hunks(
700                &[(commit_id1, 0..6), (commit_id2, 6..12)],
701                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n2a\n2b\n"])
702            ),
703            hashmap! { commit_id1 => vec![(0..6, 0..3)] }
704        );
705        // modify last line, delete adjacent middle line
706        assert_eq!(
707            split_file_hunks(
708                &[(commit_id1, 0..6), (commit_id2, 6..12)],
709                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2B\n"])
710            ),
711            hashmap! { commit_id2 => vec![(6..12, 6..9)] }
712        );
713        // modify first and last lines, delete middle line from first range
714        assert_eq!(
715            split_file_hunks(
716                &[(commit_id1, 0..6), (commit_id2, 6..12)],
717                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n2a\n2B\n"])
718            ),
719            hashmap! {
720                commit_id1 => vec![(0..6, 0..3)],
721                commit_id2 => vec![(9..12, 6..9)],
722            }
723        );
724        // modify first and last lines, delete middle line from second range
725        assert_eq!(
726            split_file_hunks(
727                &[(commit_id1, 0..6), (commit_id2, 6..12)],
728                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2B\n"])
729            ),
730            hashmap! {
731                commit_id1 => vec![(0..3, 0..3)],
732                commit_id2 => vec![(6..12, 6..9)],
733            }
734        );
735        // modify middle line, delete adjacent middle line (ambiguous)
736        assert_eq!(
737            split_file_hunks(
738                &[(commit_id1, 0..6), (commit_id2, 6..12)],
739                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2b\n"])
740            ),
741            hashmap! {}
742        );
743    }
744
745    #[test]
746    fn test_split_file_hunks_non_contiguous_ranges_insert() {
747        let commit_id1 = &CommitId::from_hex("111111");
748        let commit_id2 = &CommitId::from_hex("222222");
749
750        // insert middle line to first range
751        assert_eq!(
752            split_file_hunks(
753                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
754                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0a\n2a\n2b\n"])
755            ),
756            hashmap! { commit_id1 => vec![(6..6, 6..9)] }
757        );
758        // insert middle line to second range
759        assert_eq!(
760            split_file_hunks(
761                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
762                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2X\n2a\n2b\n"])
763            ),
764            hashmap! { commit_id2 => vec![(9..9, 9..12)] }
765        );
766        // insert middle lines to both ranges
767        assert_eq!(
768            split_file_hunks(
769                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
770                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0a\n2X\n2a\n2b\n"])
771            ),
772            hashmap! {
773                commit_id1 => vec![(6..6, 6..9)],
774                commit_id2 => vec![(9..9, 12..15)],
775            }
776        );
777    }
778
779    #[test]
780    fn test_split_file_hunks_non_contiguous_ranges_insert_modify_masked() {
781        let commit_id1 = &CommitId::from_hex("111111");
782        let commit_id2 = &CommitId::from_hex("222222");
783
784        // insert middle line to first range, modify masked line (ambiguous)
785        assert_eq!(
786            split_file_hunks(
787                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
788                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0A\n2a\n2b\n"])
789            ),
790            hashmap! {}
791        );
792        // insert middle line to second range, modify masked line (ambiguous)
793        assert_eq!(
794            split_file_hunks(
795                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
796                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2X\n2a\n2b\n"])
797            ),
798            hashmap! {}
799        );
800        // insert middle lines to both ranges, modify masked line (ambiguous)
801        assert_eq!(
802            split_file_hunks(
803                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
804                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0A\n2X\n2a\n2b\n"])
805            ),
806            hashmap! {}
807        );
808    }
809
810    #[test]
811    fn test_split_file_hunks_non_contiguous_ranges_delete() {
812        let commit_id1 = &CommitId::from_hex("111111");
813        let commit_id2 = &CommitId::from_hex("222222");
814
815        // delete middle line from first range
816        assert_eq!(
817            split_file_hunks(
818                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
819                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0a\n2a\n2b\n"])
820            ),
821            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
822        );
823        // delete middle line from second range
824        assert_eq!(
825            split_file_hunks(
826                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
827                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2b\n"])
828            ),
829            hashmap! { commit_id2 => vec![(9..12, 9..9)] }
830        );
831        // delete middle lines from both ranges
832        assert_eq!(
833            split_file_hunks(
834                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
835                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0a\n2b\n"])
836            ),
837            hashmap! {
838                commit_id1 => vec![(3..6, 3..3)],
839                commit_id2 => vec![(9..12, 6..6)],
840            }
841        );
842    }
843
844    #[test]
845    fn test_split_file_hunks_non_contiguous_ranges_delete_modify_masked() {
846        let commit_id1 = &CommitId::from_hex("111111");
847        let commit_id2 = &CommitId::from_hex("222222");
848
849        // delete middle line from first range, modify masked line (ambiguous)
850        assert_eq!(
851            split_file_hunks(
852                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
853                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0A\n2a\n2b\n"])
854            ),
855            hashmap! {}
856        );
857        // delete middle line from second range, modify masked line (ambiguous)
858        assert_eq!(
859            split_file_hunks(
860                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
861                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2b\n"])
862            ),
863            hashmap! {}
864        );
865        // delete middle lines from both ranges, modify masked line (ambiguous)
866        assert_eq!(
867            split_file_hunks(
868                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
869                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0A\n2b\n"])
870            ),
871            hashmap! {}
872        );
873    }
874
875    #[test]
876    fn test_split_file_hunks_non_contiguous_ranges_delete_delete_masked() {
877        let commit_id1 = &CommitId::from_hex("111111");
878        let commit_id2 = &CommitId::from_hex("222222");
879
880        // 'hg absorb' accepts these, but it seems better to reject them as
881        // ambiguous. Masked lines cannot be deleted.
882
883        // delete middle line from first range, delete masked line (ambiguous)
884        assert_eq!(
885            split_file_hunks(
886                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
887                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n2a\n2b\n"])
888            ),
889            hashmap! {}
890        );
891        // delete middle line from second range, delete masked line (ambiguous)
892        assert_eq!(
893            split_file_hunks(
894                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
895                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n2b\n"])
896            ),
897            hashmap! {}
898        );
899        // delete middle lines from both ranges, delete masked line (ambiguous)
900        assert_eq!(
901            split_file_hunks(
902                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
903                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n2b\n"])
904            ),
905            hashmap! {}
906        );
907    }
908
909    #[test]
910    fn test_split_file_hunks_non_contiguous_ranges_modify() {
911        let commit_id1 = &CommitId::from_hex("111111");
912        let commit_id2 = &CommitId::from_hex("222222");
913
914        // modify middle line of first range
915        assert_eq!(
916            split_file_hunks(
917                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
918                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0a\n2a\n2b\n"])
919            ),
920            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
921        );
922        // modify middle line of second range
923        assert_eq!(
924            split_file_hunks(
925                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
926                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2A\n2b\n"])
927            ),
928            hashmap! { commit_id2 => vec![(9..12, 9..12)] }
929        );
930        // modify middle lines of both ranges
931        assert_eq!(
932            split_file_hunks(
933                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
934                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0a\n2A\n2b\n"])
935            ),
936            hashmap! {
937                commit_id1 => vec![(3..6, 3..6)],
938                commit_id2 => vec![(9..12, 9..12)],
939            }
940        );
941    }
942
943    #[test]
944    fn test_split_file_hunks_non_contiguous_ranges_modify_modify_masked() {
945        let commit_id1 = &CommitId::from_hex("111111");
946        let commit_id2 = &CommitId::from_hex("222222");
947
948        // modify middle line of first range, modify masked line (ambiguous)
949        assert_eq!(
950            split_file_hunks(
951                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
952                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0A\n2a\n2b\n"])
953            ),
954            hashmap! {}
955        );
956        // modify middle line of second range, modify masked line (ambiguous)
957        assert_eq!(
958            split_file_hunks(
959                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
960                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2A\n2b\n"])
961            ),
962            hashmap! {}
963        );
964        // modify middle lines to both ranges, modify masked line (ambiguous)
965        assert_eq!(
966            split_file_hunks(
967                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
968                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0A\n2A\n2b\n"])
969            ),
970            hashmap! {}
971        );
972    }
973
974    #[test]
975    fn test_split_file_hunks_non_contiguous_tail_range_insert() {
976        let commit_id1 = &CommitId::from_hex("111111");
977
978        // insert middle line to range
979        assert_eq!(
980            split_file_hunks(
981                &[(commit_id1, 0..6) /* , 6..9 */],
982                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1b\n1X\n0a\n"])
983            ),
984            hashmap! { commit_id1 => vec![(6..6, 6..9)] }
985        );
986    }
987
988    #[test]
989    fn test_split_file_hunks_non_contiguous_tail_range_insert_modify_masked() {
990        let commit_id1 = &CommitId::from_hex("111111");
991
992        // insert middle line to range, modify masked line (ambiguous)
993        assert_eq!(
994            split_file_hunks(
995                &[(commit_id1, 0..6) /* , 6..9 */],
996                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1b\n1X\n0A\n"])
997            ),
998            hashmap! {}
999        );
1000    }
1001
1002    #[test]
1003    fn test_split_file_hunks_non_contiguous_tail_range_delete() {
1004        let commit_id1 = &CommitId::from_hex("111111");
1005
1006        // delete middle line from range
1007        assert_eq!(
1008            split_file_hunks(
1009                &[(commit_id1, 0..6) /* , 6..9 */],
1010                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n0a\n"])
1011            ),
1012            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
1013        );
1014        // delete all lines from range
1015        assert_eq!(
1016            split_file_hunks(
1017                &[(commit_id1, 0..6) /* , 6..9 */],
1018                &ContentDiff::by_line(["1a\n1b\n0a\n", "0a\n"])
1019            ),
1020            hashmap! { commit_id1 => vec![(0..6, 0..0)] }
1021        );
1022    }
1023
1024    #[test]
1025    fn test_split_file_hunks_non_contiguous_tail_range_delete_modify_masked() {
1026        let commit_id1 = &CommitId::from_hex("111111");
1027
1028        // delete middle line from range, modify masked line (ambiguous)
1029        assert_eq!(
1030            split_file_hunks(
1031                &[(commit_id1, 0..6) /* , 6..9 */],
1032                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n0A\n"])
1033            ),
1034            hashmap! {}
1035        );
1036        // delete all lines from range, modify masked line (ambiguous)
1037        assert_eq!(
1038            split_file_hunks(
1039                &[(commit_id1, 0..6) /* , 6..9 */],
1040                &ContentDiff::by_line(["1a\n1b\n0a\n", "0A\n"])
1041            ),
1042            hashmap! {}
1043        );
1044    }
1045
1046    #[test]
1047    fn test_split_file_hunks_non_contiguous_tail_range_delete_delete_masked() {
1048        let commit_id1 = &CommitId::from_hex("111111");
1049
1050        // 'hg absorb' accepts these, but it seems better to reject them as
1051        // ambiguous. Masked lines cannot be deleted.
1052
1053        // delete middle line from range, delete masked line (ambiguous)
1054        assert_eq!(
1055            split_file_hunks(
1056                &[(commit_id1, 0..6) /* , 6..9 */],
1057                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n"])
1058            ),
1059            hashmap! {}
1060        );
1061        // delete all lines from range, delete masked line (ambiguous)
1062        assert_eq!(
1063            split_file_hunks(
1064                &[(commit_id1, 0..6) /* , 6..9 */],
1065                &ContentDiff::by_line(["1a\n1b\n0a\n", ""])
1066            ),
1067            hashmap! {}
1068        );
1069    }
1070
1071    #[test]
1072    fn test_split_file_hunks_non_contiguous_tail_range_modify() {
1073        let commit_id1 = &CommitId::from_hex("111111");
1074
1075        // modify middle line of range
1076        assert_eq!(
1077            split_file_hunks(
1078                &[(commit_id1, 0..6) /* , 6..9 */],
1079                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1B\n0a\n"])
1080            ),
1081            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
1082        );
1083    }
1084
1085    #[test]
1086    fn test_split_file_hunks_non_contiguous_tail_range_modify_modify_masked() {
1087        let commit_id1 = &CommitId::from_hex("111111");
1088
1089        // modify middle line of range, modify masked line (ambiguous)
1090        assert_eq!(
1091            split_file_hunks(
1092                &[(commit_id1, 0..6) /* , 6..9 */],
1093                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1B\n0A\n"])
1094            ),
1095            hashmap! {}
1096        );
1097    }
1098
1099    #[test]
1100    fn test_split_file_hunks_multiple_edits() {
1101        let commit_id1 = &CommitId::from_hex("111111");
1102        let commit_id2 = &CommitId::from_hex("222222");
1103        let commit_id3 = &CommitId::from_hex("333333");
1104
1105        assert_eq!(
1106            split_file_hunks(
1107                &[
1108                    (commit_id1, 0..3),   // 1a       => 1A
1109                    (commit_id2, 3..6),   // 2a       => 2a
1110                    (commit_id1, 6..15),  // 1b 1c 1d => 1B 1d
1111                    (commit_id3, 15..21), // 3a 3b    => 3X 3A 3b 3Y
1112                ],
1113                &ContentDiff::by_line([
1114                    "1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1115                    "1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n"
1116                ])
1117            ),
1118            hashmap! {
1119                commit_id1 => vec![(0..3, 0..3), (6..12, 6..9)],
1120                commit_id3 => vec![(15..18, 12..18), (21..21, 21..24)],
1121            }
1122        );
1123    }
1124
1125    #[test]
1126    fn test_combine_texts() {
1127        assert_eq!(combine_texts(b"", b"", &[]), "");
1128        assert_eq!(combine_texts(b"foo", b"bar", &[]), "foo");
1129        assert_eq!(combine_texts(b"foo", b"bar", &[(0..3, 0..3)]), "bar");
1130
1131        assert_eq!(
1132            combine_texts(
1133                b"1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1134                b"1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n",
1135                &[(0..3, 0..3), (6..12, 6..9)]
1136            ),
1137            "1A\n2a\n1B\n1d\n3a\n3b\n"
1138        );
1139        assert_eq!(
1140            combine_texts(
1141                b"1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1142                b"1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n",
1143                &[(15..18, 12..18), (21..21, 21..24)]
1144            ),
1145            "1a\n2a\n1b\n1c\n1d\n3X\n3A\n3b\n3Y\n"
1146        );
1147    }
1148}