jj_lib/
absorb.rs

1// Copyright 2024 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Algorithm to split changes in a single source commit into its most relevant
16//! ancestors, 'absorbing' them away.
17
18use std::cmp;
19use std::collections::HashMap;
20use std::ops::Range;
21use std::sync::Arc;
22
23use bstr::BString;
24use futures::StreamExt as _;
25use itertools::Itertools as _;
26use pollster::FutureExt as _;
27use thiserror::Error;
28
29use crate::annotate::FileAnnotator;
30use crate::backend::BackendError;
31use crate::backend::BackendResult;
32use crate::backend::CommitId;
33use crate::backend::TreeValue;
34use crate::commit::Commit;
35use crate::commit::conflict_label_for_commits;
36use crate::conflicts::MaterializedFileValue;
37use crate::conflicts::MaterializedTreeValue;
38use crate::conflicts::materialized_diff_stream;
39use crate::copies::CopyRecords;
40use crate::diff::ContentDiff;
41use crate::diff::DiffHunkKind;
42use crate::matchers::Matcher;
43use crate::merge::Diff;
44use crate::merge::Merge;
45use crate::merged_tree::MergedTree;
46use crate::merged_tree::MergedTreeBuilder;
47use crate::repo::MutableRepo;
48use crate::repo::Repo;
49use crate::repo_path::RepoPathBuf;
50use crate::revset::ResolvedRevsetExpression;
51use crate::revset::RevsetEvaluationError;
52
53/// The source commit to absorb into its ancestry.
54#[derive(Clone, Debug)]
55pub struct AbsorbSource {
56    commit: Commit,
57    parents: Vec<Commit>,
58    parent_tree: MergedTree,
59}
60
61impl AbsorbSource {
62    /// Create an absorb source from a single commit.
63    pub fn from_commit(repo: &dyn Repo, commit: Commit) -> BackendResult<Self> {
64        let parents = commit.parents().try_collect()?;
65        let parent_tree = commit.parent_tree(repo)?;
66        Ok(Self {
67            commit,
68            parents,
69            parent_tree,
70        })
71    }
72}
73
74/// Error splitting an absorb source into modified ancestry trees.
75#[derive(Debug, Error)]
76pub enum AbsorbError {
77    /// Error while contacting the Backend.
78    #[error(transparent)]
79    Backend(#[from] BackendError),
80    /// Error resolving commit ancestry.
81    #[error(transparent)]
82    RevsetEvaluation(#[from] RevsetEvaluationError),
83}
84
85/// An absorb 'plan' indicating which commits should be modified and what they
86/// should be modified to.
87#[derive(Default)]
88pub struct SelectedTrees {
89    /// Commits to be modified, to be passed to `absorb_hunks`.
90    pub target_commits: HashMap<CommitId, MergedTreeBuilder>,
91    /// Paths that were not absorbed for various error reasons.
92    pub skipped_paths: Vec<(RepoPathBuf, String)>,
93}
94
95/// Builds trees to be merged into destination commits by splitting source
96/// changes based on file annotation.
97pub async fn split_hunks_to_trees(
98    repo: &dyn Repo,
99    source: &AbsorbSource,
100    destinations: &Arc<ResolvedRevsetExpression>,
101    matcher: &dyn Matcher,
102) -> Result<SelectedTrees, AbsorbError> {
103    let mut selected_trees = SelectedTrees::default();
104
105    let left_tree = &source.parent_tree;
106    let right_tree = source.commit.tree();
107    // TODO: enable copy tracking if we add support for annotate and merge
108    let copy_records = CopyRecords::default();
109    let tree_diff = left_tree.diff_stream_with_copies(&right_tree, matcher, &copy_records);
110    let mut diff_stream = materialized_diff_stream(
111        repo.store(),
112        tree_diff,
113        Diff::new(left_tree.labels(), right_tree.labels()),
114    );
115    while let Some(entry) = diff_stream.next().await {
116        let left_path = entry.path.source();
117        let right_path = entry.path.target();
118        let values = entry.values?;
119        let (left_text, executable, copy_id) = match to_file_value(values.before) {
120            Ok(Some(mut value)) => (
121                value.read_all(left_path).await?,
122                value.executable,
123                value.copy_id,
124            ),
125            // New file should have no destinations
126            Ok(None) => continue,
127            Err(reason) => {
128                selected_trees
129                    .skipped_paths
130                    .push((left_path.to_owned(), reason));
131                continue;
132            }
133        };
134        let (right_text, deleted) = match to_file_value(values.after) {
135            Ok(Some(mut value)) => (value.read_all(right_path).await?, false),
136            Ok(None) => (vec![], true),
137            Err(reason) => {
138                selected_trees
139                    .skipped_paths
140                    .push((right_path.to_owned(), reason));
141                continue;
142            }
143        };
144
145        // Compute annotation of parent (= left) content to map right hunks
146        let mut annotator =
147            FileAnnotator::with_file_content(source.commit.id(), left_path, left_text.clone());
148        annotator.compute(repo, destinations)?;
149        let annotation = annotator.to_annotation();
150        let annotation_ranges = annotation
151            .compact_line_ranges()
152            .filter_map(|(commit_id, range)| Some((commit_id.ok()?, range)))
153            .collect_vec();
154        let diff = ContentDiff::by_line([&left_text, &right_text]);
155        let selected_ranges = split_file_hunks(&annotation_ranges, &diff);
156        // Build trees containing parent (= left) contents + selected hunks
157        for (&commit_id, ranges) in &selected_ranges {
158            let tree_builder = selected_trees
159                .target_commits
160                .entry(commit_id.clone())
161                .or_insert_with(|| MergedTreeBuilder::new(left_tree.clone()));
162            let new_text = combine_texts(&left_text, &right_text, ranges);
163            // Since changes to be absorbed are represented as diffs relative to
164            // the source parent, we can propagate file deletion only if the
165            // whole file content is deleted at a single destination commit.
166            let new_tree_value = if new_text.is_empty() && deleted {
167                Merge::absent()
168            } else {
169                let id = repo
170                    .store()
171                    .write_file(left_path, &mut new_text.as_slice())
172                    .await?;
173                Merge::normal(TreeValue::File {
174                    id,
175                    executable,
176                    copy_id: copy_id.clone(),
177                })
178            };
179            tree_builder.set_or_remove(left_path.to_owned(), new_tree_value);
180        }
181    }
182
183    Ok(selected_trees)
184}
185
186type SelectedRange = (Range<usize>, Range<usize>);
187
188/// Maps `diff` hunks to commits based on the left `annotation_ranges`. The
189/// `annotation_ranges` should be compacted.
190fn split_file_hunks<'a>(
191    mut annotation_ranges: &[(&'a CommitId, Range<usize>)],
192    diff: &ContentDiff,
193) -> HashMap<&'a CommitId, Vec<SelectedRange>> {
194    debug_assert!(annotation_ranges.iter().all(|(_, range)| !range.is_empty()));
195    let mut selected_ranges: HashMap<&CommitId, Vec<_>> = HashMap::new();
196    let mut diff_hunk_ranges = diff
197        .hunk_ranges()
198        .filter(|hunk| hunk.kind == DiffHunkKind::Different);
199    while !annotation_ranges.is_empty() {
200        let Some(hunk) = diff_hunk_ranges.next() else {
201            break;
202        };
203        let [left_range, right_range]: &[_; 2] = hunk.ranges[..].try_into().unwrap();
204        assert!(!left_range.is_empty() || !right_range.is_empty());
205        if right_range.is_empty() {
206            // If the hunk is pure deletion, it can be mapped to multiple
207            // overlapped annotation ranges unambiguously.
208            let skip = annotation_ranges
209                .iter()
210                .take_while(|(_, range)| range.end <= left_range.start)
211                .count();
212            annotation_ranges = &annotation_ranges[skip..];
213            let pre_overlap = annotation_ranges
214                .iter()
215                .take_while(|(_, range)| range.end < left_range.end)
216                .count();
217            let maybe_overlapped_ranges = annotation_ranges.get(..pre_overlap + 1);
218            annotation_ranges = &annotation_ranges[pre_overlap..];
219            let Some(overlapped_ranges) = maybe_overlapped_ranges else {
220                continue;
221            };
222            // Ensure that the ranges are contiguous and include the start.
223            let all_covered = overlapped_ranges
224                .iter()
225                .try_fold(left_range.start, |prev_end, (_, cur)| {
226                    (cur.start <= prev_end).then_some(cur.end)
227                })
228                .inspect(|&last_end| assert!(left_range.end <= last_end))
229                .is_some();
230            if all_covered {
231                for (commit_id, cur_range) in overlapped_ranges {
232                    let start = cmp::max(cur_range.start, left_range.start);
233                    let end = cmp::min(cur_range.end, left_range.end);
234                    assert!(start < end);
235                    let selected = selected_ranges.entry(commit_id).or_default();
236                    selected.push((start..end, right_range.clone()));
237                }
238            }
239        } else {
240            // In other cases, the hunk should be included in an annotation
241            // range to map it unambiguously. Skip any pre-overlapped ranges.
242            let skip = annotation_ranges
243                .iter()
244                .take_while(|(_, range)| range.end < left_range.end)
245                .count();
246            annotation_ranges = &annotation_ranges[skip..];
247            let Some((commit_id, cur_range)) = annotation_ranges.first() else {
248                continue;
249            };
250            let contained = cur_range.start <= left_range.start && left_range.end <= cur_range.end;
251            // If the hunk is pure insertion, it can be mapped to two distinct
252            // annotation ranges, which is ambiguous.
253            let ambiguous = cur_range.end == left_range.start
254                && annotation_ranges
255                    .get(1)
256                    .is_some_and(|(_, next_range)| next_range.start == left_range.end);
257            if contained && !ambiguous {
258                let selected = selected_ranges.entry(commit_id).or_default();
259                selected.push((left_range.clone(), right_range.clone()));
260            }
261        }
262    }
263    selected_ranges
264}
265
266/// Constructs new text by replacing `text1` range with `text2` range for each
267/// selected `(range1, range2)` pairs.
268fn combine_texts(text1: &[u8], text2: &[u8], selected_ranges: &[SelectedRange]) -> BString {
269    itertools::chain!(
270        [(0..0, 0..0)],
271        selected_ranges.iter().cloned(),
272        [(text1.len()..text1.len(), text2.len()..text2.len())],
273    )
274    .tuple_windows()
275    // Copy unchanged hunk from text1 and current hunk from text2
276    .map(|((prev1, _), (cur1, cur2))| (prev1.end..cur1.start, cur2))
277    .flat_map(|(range1, range2)| [&text1[range1], &text2[range2]])
278    .collect()
279}
280
281/// Describes changes made by [`absorb_hunks()`].
282#[derive(Clone, Debug)]
283pub struct AbsorbStats {
284    /// Rewritten source commit which the absorbed hunks were removed, or `None`
285    /// if the source commit was abandoned or no hunks were moved.
286    pub rewritten_source: Option<Commit>,
287    /// Rewritten commits which the source hunks were absorbed into, in forward
288    /// topological order.
289    pub rewritten_destinations: Vec<Commit>,
290    /// Number of descendant commits which were rebased. The number of rewritten
291    /// destination commits are not included.
292    pub num_rebased: usize,
293}
294
295/// Merges selected trees into the specified commits. Abandons the source commit
296/// if it becomes discardable.
297pub fn absorb_hunks(
298    repo: &mut MutableRepo,
299    source: &AbsorbSource,
300    mut selected_trees: HashMap<CommitId, MergedTreeBuilder>,
301) -> BackendResult<AbsorbStats> {
302    let mut rewritten_source = None;
303    let mut rewritten_destinations = Vec::new();
304    let mut num_rebased = 0;
305    let parents_label = conflict_label_for_commits(&source.parents);
306    let source_commit_label = source.commit.conflict_label();
307    // Rewrite commits in topological order so that descendant commits wouldn't
308    // be rewritten multiple times.
309    repo.transform_descendants(selected_trees.keys().cloned().collect(), async |rewriter| {
310        // Remove selected hunks from the source commit by reparent()
311        if rewriter.old_commit().id() == source.commit.id() {
312            let commit_builder = rewriter.reparent();
313            if commit_builder.is_discardable()? {
314                commit_builder.abandon();
315            } else {
316                rewritten_source = Some(commit_builder.write()?);
317                num_rebased += 1;
318            }
319            return Ok(());
320        }
321        let Some(tree_builder) = selected_trees.remove(rewriter.old_commit().id()) else {
322            rewriter.rebase().await?.write()?;
323            num_rebased += 1;
324            return Ok(());
325        };
326        // Merge hunks between source parent tree and selected tree
327        let selected_tree = tree_builder.write_tree()?;
328        let destination_label = rewriter.old_commit().conflict_label();
329        let commit_builder = rewriter.rebase().await?;
330        let destination_tree = commit_builder.tree();
331        let new_tree = MergedTree::merge(Merge::from_vec(vec![
332            (
333                destination_tree,
334                format!("{destination_label} (absorb destination)"),
335            ),
336            (
337                source.parent_tree.clone(),
338                format!("{parents_label} (parents of absorbed revision)"),
339            ),
340            (
341                selected_tree,
342                format!("absorbed changes (from {source_commit_label})"),
343            ),
344        ]))
345        .block_on()?;
346        let mut predecessors = commit_builder.predecessors().to_vec();
347        predecessors.push(source.commit.id().clone());
348        let new_commit = commit_builder
349            .set_tree(new_tree)
350            .set_predecessors(predecessors)
351            .write()?;
352        rewritten_destinations.push(new_commit);
353        Ok(())
354    })?;
355    Ok(AbsorbStats {
356        rewritten_source,
357        rewritten_destinations,
358        num_rebased,
359    })
360}
361
362fn to_file_value(value: MaterializedTreeValue) -> Result<Option<MaterializedFileValue>, String> {
363    match value {
364        MaterializedTreeValue::Absent => Ok(None), // New or deleted file
365        MaterializedTreeValue::AccessDenied(err) => Err(format!("Access is denied: {err}")),
366        MaterializedTreeValue::File(file) => Ok(Some(file)),
367        MaterializedTreeValue::Symlink { .. } => Err("Is a symlink".into()),
368        MaterializedTreeValue::FileConflict(_) | MaterializedTreeValue::OtherConflict { .. } => {
369            Err("Is a conflict".into())
370        }
371        MaterializedTreeValue::GitSubmodule(_) => Err("Is a Git submodule".into()),
372        MaterializedTreeValue::Tree(_) => panic!("diff should not contain trees"),
373    }
374}
375
376#[cfg(test)]
377mod tests {
378    use maplit::hashmap;
379
380    use super::*;
381
382    #[test]
383    fn test_split_file_hunks_empty_or_single_line() {
384        let commit_id1 = &CommitId::from_hex("111111");
385
386        // unchanged
387        assert_eq!(
388            split_file_hunks(&[], &ContentDiff::by_line(["", ""])),
389            hashmap! {}
390        );
391
392        // insert single line
393        assert_eq!(
394            split_file_hunks(&[], &ContentDiff::by_line(["", "2X\n"])),
395            hashmap! {}
396        );
397        // delete single line
398        assert_eq!(
399            split_file_hunks(&[(commit_id1, 0..3)], &ContentDiff::by_line(["1a\n", ""])),
400            hashmap! { commit_id1 => vec![(0..3, 0..0)] }
401        );
402        // modify single line
403        assert_eq!(
404            split_file_hunks(
405                &[(commit_id1, 0..3)],
406                &ContentDiff::by_line(["1a\n", "1AA\n"])
407            ),
408            hashmap! { commit_id1 => vec![(0..3, 0..4)] }
409        );
410    }
411
412    #[test]
413    fn test_split_file_hunks_single_range() {
414        let commit_id1 = &CommitId::from_hex("111111");
415
416        // insert first, middle, and last lines
417        assert_eq!(
418            split_file_hunks(
419                &[(commit_id1, 0..6)],
420                &ContentDiff::by_line(["1a\n1b\n", "1X\n1a\n1Y\n1b\n1Z\n"])
421            ),
422            hashmap! {
423                commit_id1 => vec![(0..0, 0..3), (3..3, 6..9), (6..6, 12..15)],
424            }
425        );
426        // delete first, middle, and last lines
427        assert_eq!(
428            split_file_hunks(
429                &[(commit_id1, 0..15)],
430                &ContentDiff::by_line(["1a\n1b\n1c\n1d\n1e\n1f\n", "1b\n1d\n1f\n"])
431            ),
432            hashmap! {
433                commit_id1 => vec![(0..3, 0..0), (6..9, 3..3), (12..15, 6..6)],
434            }
435        );
436        // modify non-contiguous lines
437        assert_eq!(
438            split_file_hunks(
439                &[(commit_id1, 0..12)],
440                &ContentDiff::by_line(["1a\n1b\n1c\n1d\n", "1A\n1b\n1C\n1d\n"])
441            ),
442            hashmap! { commit_id1 => vec![(0..3, 0..3), (6..9, 6..9)] }
443        );
444    }
445
446    #[test]
447    fn test_split_file_hunks_contiguous_ranges_insert() {
448        let commit_id1 = &CommitId::from_hex("111111");
449        let commit_id2 = &CommitId::from_hex("222222");
450
451        // insert first line
452        assert_eq!(
453            split_file_hunks(
454                &[(commit_id1, 0..6), (commit_id2, 6..12)],
455                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1X\n1a\n1b\n2a\n2b\n"])
456            ),
457            hashmap! { commit_id1 => vec![(0..0, 0..3)] }
458        );
459        // insert middle line to first range
460        assert_eq!(
461            split_file_hunks(
462                &[(commit_id1, 0..6), (commit_id2, 6..12)],
463                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1X\n1b\n2a\n2b\n"])
464            ),
465            hashmap! { commit_id1 => vec![(3..3, 3..6)] }
466        );
467        // insert middle line between ranges (ambiguous)
468        assert_eq!(
469            split_file_hunks(
470                &[(commit_id1, 0..6), (commit_id2, 6..12)],
471                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n3X\n2a\n2b\n"])
472            ),
473            hashmap! {}
474        );
475        // insert middle line to second range
476        assert_eq!(
477            split_file_hunks(
478                &[(commit_id1, 0..6), (commit_id2, 6..12)],
479                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2X\n2b\n"])
480            ),
481            hashmap! { commit_id2 => vec![(9..9, 9..12)] }
482        );
483        // insert last line
484        assert_eq!(
485            split_file_hunks(
486                &[(commit_id1, 0..6), (commit_id2, 6..12)],
487                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2b\n2X\n"])
488            ),
489            hashmap! { commit_id2 => vec![(12..12, 12..15)] }
490        );
491    }
492
493    #[test]
494    fn test_split_file_hunks_contiguous_ranges_delete() {
495        let commit_id1 = &CommitId::from_hex("111111");
496        let commit_id2 = &CommitId::from_hex("222222");
497
498        // delete first line
499        assert_eq!(
500            split_file_hunks(
501                &[(commit_id1, 0..6), (commit_id2, 6..12)],
502                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1b\n2a\n2b\n"])
503            ),
504            hashmap! { commit_id1 => vec![(0..3, 0..0)] }
505        );
506        // delete middle line from first range
507        assert_eq!(
508            split_file_hunks(
509                &[(commit_id1, 0..6), (commit_id2, 6..12)],
510                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n2a\n2b\n"])
511            ),
512            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
513        );
514        // delete middle line from second range
515        assert_eq!(
516            split_file_hunks(
517                &[(commit_id1, 0..6), (commit_id2, 6..12)],
518                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2b\n"])
519            ),
520            hashmap! { commit_id2 => vec![(6..9, 6..6)] }
521        );
522        // delete last line
523        assert_eq!(
524            split_file_hunks(
525                &[(commit_id1, 0..6), (commit_id2, 6..12)],
526                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n"])
527            ),
528            hashmap! { commit_id2 => vec![(9..12, 9..9)] }
529        );
530        // delete first and last lines
531        assert_eq!(
532            split_file_hunks(
533                &[(commit_id1, 0..6), (commit_id2, 6..12)],
534                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1b\n2a\n"])
535            ),
536            hashmap! {
537                commit_id1 => vec![(0..3, 0..0)],
538                commit_id2 => vec![(9..12, 6..6)],
539            }
540        );
541
542        // delete across ranges (split first annotation range)
543        assert_eq!(
544            split_file_hunks(
545                &[(commit_id1, 0..6), (commit_id2, 6..12)],
546                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n"])
547            ),
548            hashmap! {
549                commit_id1 => vec![(3..6, 3..3)],
550                commit_id2 => vec![(6..12, 3..3)],
551            }
552        );
553        // delete middle lines across ranges (split both annotation ranges)
554        assert_eq!(
555            split_file_hunks(
556                &[(commit_id1, 0..6), (commit_id2, 6..12)],
557                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n2b\n"])
558            ),
559            hashmap! {
560                commit_id1 => vec![(3..6, 3..3)],
561                commit_id2 => vec![(6..9, 3..3)],
562            }
563        );
564        // delete across ranges (split second annotation range)
565        assert_eq!(
566            split_file_hunks(
567                &[(commit_id1, 0..6), (commit_id2, 6..12)],
568                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "2b\n"])
569            ),
570            hashmap! {
571                commit_id1 => vec![(0..6, 0..0)],
572                commit_id2 => vec![(6..9, 0..0)],
573            }
574        );
575
576        // delete all
577        assert_eq!(
578            split_file_hunks(
579                &[(commit_id1, 0..6), (commit_id2, 6..12)],
580                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", ""])
581            ),
582            hashmap! {
583                commit_id1 => vec![(0..6, 0..0)],
584                commit_id2 => vec![(6..12, 0..0)],
585            }
586        );
587    }
588
589    #[test]
590    fn test_split_file_hunks_contiguous_ranges_modify() {
591        let commit_id1 = &CommitId::from_hex("111111");
592        let commit_id2 = &CommitId::from_hex("222222");
593
594        // modify first line
595        assert_eq!(
596            split_file_hunks(
597                &[(commit_id1, 0..6), (commit_id2, 6..12)],
598                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2a\n2b\n"])
599            ),
600            hashmap! { commit_id1 => vec![(0..3, 0..3)] }
601        );
602        // modify middle line of first range
603        assert_eq!(
604            split_file_hunks(
605                &[(commit_id1, 0..6), (commit_id2, 6..12)],
606                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2a\n2b\n"])
607            ),
608            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
609        );
610        // modify middle lines of both ranges (ambiguous)
611        // ('hg absorb' accepts this)
612        assert_eq!(
613            split_file_hunks(
614                &[(commit_id1, 0..6), (commit_id2, 6..12)],
615                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2A\n2b\n"])
616            ),
617            hashmap! {}
618        );
619        // modify middle line of second range
620        assert_eq!(
621            split_file_hunks(
622                &[(commit_id1, 0..6), (commit_id2, 6..12)],
623                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2A\n2b\n"])
624            ),
625            hashmap! { commit_id2 => vec![(6..9, 6..9)] }
626        );
627        // modify last line
628        assert_eq!(
629            split_file_hunks(
630                &[(commit_id1, 0..6), (commit_id2, 6..12)],
631                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2B\n"])
632            ),
633            hashmap! { commit_id2 => vec![(9..12, 9..12)] }
634        );
635        // modify first and last lines
636        assert_eq!(
637            split_file_hunks(
638                &[(commit_id1, 0..6), (commit_id2, 6..12)],
639                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2a\n2B\n"])
640            ),
641            hashmap! {
642                commit_id1 => vec![(0..3, 0..3)],
643                commit_id2 => vec![(9..12, 9..12)],
644            }
645        );
646    }
647
648    #[test]
649    fn test_split_file_hunks_contiguous_ranges_modify_insert() {
650        let commit_id1 = &CommitId::from_hex("111111");
651        let commit_id2 = &CommitId::from_hex("222222");
652
653        // modify first range, insert adjacent middle line
654        assert_eq!(
655            split_file_hunks(
656                &[(commit_id1, 0..6), (commit_id2, 6..12)],
657                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1B\n1X\n2a\n2b\n"])
658            ),
659            hashmap! { commit_id1 => vec![(0..6, 0..9)] }
660        );
661        // modify second range, insert adjacent middle line
662        assert_eq!(
663            split_file_hunks(
664                &[(commit_id1, 0..6), (commit_id2, 6..12)],
665                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2X\n2A\n2B\n"])
666            ),
667            hashmap! { commit_id2 => vec![(6..12, 6..15)] }
668        );
669        // modify second range, insert last line
670        assert_eq!(
671            split_file_hunks(
672                &[(commit_id1, 0..6), (commit_id2, 6..12)],
673                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2A\n2B\n2X\n"])
674            ),
675            hashmap! { commit_id2 => vec![(6..12, 6..15)] }
676        );
677        // modify first and last lines (unambiguous), insert middle line between
678        // ranges (ambiguous)
679        assert_eq!(
680            split_file_hunks(
681                &[(commit_id1, 0..6), (commit_id2, 6..12)],
682                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n3X\n2a\n2B\n"])
683            ),
684            hashmap! {
685                commit_id1 => vec![(0..3, 0..3)],
686                commit_id2 => vec![(9..12, 12..15)],
687            }
688        );
689    }
690
691    #[test]
692    fn test_split_file_hunks_contiguous_ranges_modify_delete() {
693        let commit_id1 = &CommitId::from_hex("111111");
694        let commit_id2 = &CommitId::from_hex("222222");
695
696        // modify first line, delete adjacent middle line
697        assert_eq!(
698            split_file_hunks(
699                &[(commit_id1, 0..6), (commit_id2, 6..12)],
700                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n2a\n2b\n"])
701            ),
702            hashmap! { commit_id1 => vec![(0..6, 0..3)] }
703        );
704        // modify last line, delete adjacent middle line
705        assert_eq!(
706            split_file_hunks(
707                &[(commit_id1, 0..6), (commit_id2, 6..12)],
708                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2B\n"])
709            ),
710            hashmap! { commit_id2 => vec![(6..12, 6..9)] }
711        );
712        // modify first and last lines, delete middle line from first range
713        assert_eq!(
714            split_file_hunks(
715                &[(commit_id1, 0..6), (commit_id2, 6..12)],
716                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n2a\n2B\n"])
717            ),
718            hashmap! {
719                commit_id1 => vec![(0..6, 0..3)],
720                commit_id2 => vec![(9..12, 6..9)],
721            }
722        );
723        // modify first and last lines, delete middle line from second range
724        assert_eq!(
725            split_file_hunks(
726                &[(commit_id1, 0..6), (commit_id2, 6..12)],
727                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2B\n"])
728            ),
729            hashmap! {
730                commit_id1 => vec![(0..3, 0..3)],
731                commit_id2 => vec![(6..12, 6..9)],
732            }
733        );
734        // modify middle line, delete adjacent middle line (ambiguous)
735        assert_eq!(
736            split_file_hunks(
737                &[(commit_id1, 0..6), (commit_id2, 6..12)],
738                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2b\n"])
739            ),
740            hashmap! {}
741        );
742    }
743
744    #[test]
745    fn test_split_file_hunks_non_contiguous_ranges_insert() {
746        let commit_id1 = &CommitId::from_hex("111111");
747        let commit_id2 = &CommitId::from_hex("222222");
748
749        // insert middle line to first range
750        assert_eq!(
751            split_file_hunks(
752                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
753                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0a\n2a\n2b\n"])
754            ),
755            hashmap! { commit_id1 => vec![(6..6, 6..9)] }
756        );
757        // insert middle line to second range
758        assert_eq!(
759            split_file_hunks(
760                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
761                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2X\n2a\n2b\n"])
762            ),
763            hashmap! { commit_id2 => vec![(9..9, 9..12)] }
764        );
765        // insert middle lines to both ranges
766        assert_eq!(
767            split_file_hunks(
768                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
769                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0a\n2X\n2a\n2b\n"])
770            ),
771            hashmap! {
772                commit_id1 => vec![(6..6, 6..9)],
773                commit_id2 => vec![(9..9, 12..15)],
774            }
775        );
776    }
777
778    #[test]
779    fn test_split_file_hunks_non_contiguous_ranges_insert_modify_masked() {
780        let commit_id1 = &CommitId::from_hex("111111");
781        let commit_id2 = &CommitId::from_hex("222222");
782
783        // insert middle line to first range, modify masked line (ambiguous)
784        assert_eq!(
785            split_file_hunks(
786                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
787                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0A\n2a\n2b\n"])
788            ),
789            hashmap! {}
790        );
791        // insert middle line to second range, modify masked line (ambiguous)
792        assert_eq!(
793            split_file_hunks(
794                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
795                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2X\n2a\n2b\n"])
796            ),
797            hashmap! {}
798        );
799        // insert middle lines to both ranges, modify masked line (ambiguous)
800        assert_eq!(
801            split_file_hunks(
802                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
803                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0A\n2X\n2a\n2b\n"])
804            ),
805            hashmap! {}
806        );
807    }
808
809    #[test]
810    fn test_split_file_hunks_non_contiguous_ranges_delete() {
811        let commit_id1 = &CommitId::from_hex("111111");
812        let commit_id2 = &CommitId::from_hex("222222");
813
814        // delete middle line from first range
815        assert_eq!(
816            split_file_hunks(
817                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
818                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0a\n2a\n2b\n"])
819            ),
820            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
821        );
822        // delete middle line from second range
823        assert_eq!(
824            split_file_hunks(
825                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
826                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2b\n"])
827            ),
828            hashmap! { commit_id2 => vec![(9..12, 9..9)] }
829        );
830        // delete middle lines from both ranges
831        assert_eq!(
832            split_file_hunks(
833                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
834                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0a\n2b\n"])
835            ),
836            hashmap! {
837                commit_id1 => vec![(3..6, 3..3)],
838                commit_id2 => vec![(9..12, 6..6)],
839            }
840        );
841    }
842
843    #[test]
844    fn test_split_file_hunks_non_contiguous_ranges_delete_modify_masked() {
845        let commit_id1 = &CommitId::from_hex("111111");
846        let commit_id2 = &CommitId::from_hex("222222");
847
848        // delete middle line from first range, modify masked line (ambiguous)
849        assert_eq!(
850            split_file_hunks(
851                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
852                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0A\n2a\n2b\n"])
853            ),
854            hashmap! {}
855        );
856        // delete middle line from second range, modify masked line (ambiguous)
857        assert_eq!(
858            split_file_hunks(
859                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
860                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2b\n"])
861            ),
862            hashmap! {}
863        );
864        // delete middle lines from both ranges, modify masked line (ambiguous)
865        assert_eq!(
866            split_file_hunks(
867                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
868                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0A\n2b\n"])
869            ),
870            hashmap! {}
871        );
872    }
873
874    #[test]
875    fn test_split_file_hunks_non_contiguous_ranges_delete_delete_masked() {
876        let commit_id1 = &CommitId::from_hex("111111");
877        let commit_id2 = &CommitId::from_hex("222222");
878
879        // 'hg absorb' accepts these, but it seems better to reject them as
880        // ambiguous. Masked lines cannot be deleted.
881
882        // delete middle line from first range, delete masked line (ambiguous)
883        assert_eq!(
884            split_file_hunks(
885                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
886                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n2a\n2b\n"])
887            ),
888            hashmap! {}
889        );
890        // delete middle line from second range, delete masked line (ambiguous)
891        assert_eq!(
892            split_file_hunks(
893                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
894                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n2b\n"])
895            ),
896            hashmap! {}
897        );
898        // delete middle lines from both ranges, delete masked line (ambiguous)
899        assert_eq!(
900            split_file_hunks(
901                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
902                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n2b\n"])
903            ),
904            hashmap! {}
905        );
906    }
907
908    #[test]
909    fn test_split_file_hunks_non_contiguous_ranges_modify() {
910        let commit_id1 = &CommitId::from_hex("111111");
911        let commit_id2 = &CommitId::from_hex("222222");
912
913        // modify middle line of first range
914        assert_eq!(
915            split_file_hunks(
916                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
917                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0a\n2a\n2b\n"])
918            ),
919            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
920        );
921        // modify middle line of second range
922        assert_eq!(
923            split_file_hunks(
924                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
925                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2A\n2b\n"])
926            ),
927            hashmap! { commit_id2 => vec![(9..12, 9..12)] }
928        );
929        // modify middle lines of both ranges
930        assert_eq!(
931            split_file_hunks(
932                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
933                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0a\n2A\n2b\n"])
934            ),
935            hashmap! {
936                commit_id1 => vec![(3..6, 3..6)],
937                commit_id2 => vec![(9..12, 9..12)],
938            }
939        );
940    }
941
942    #[test]
943    fn test_split_file_hunks_non_contiguous_ranges_modify_modify_masked() {
944        let commit_id1 = &CommitId::from_hex("111111");
945        let commit_id2 = &CommitId::from_hex("222222");
946
947        // modify middle line of first range, modify masked line (ambiguous)
948        assert_eq!(
949            split_file_hunks(
950                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
951                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0A\n2a\n2b\n"])
952            ),
953            hashmap! {}
954        );
955        // modify middle line of second range, modify masked line (ambiguous)
956        assert_eq!(
957            split_file_hunks(
958                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
959                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2A\n2b\n"])
960            ),
961            hashmap! {}
962        );
963        // modify middle lines to both ranges, modify masked line (ambiguous)
964        assert_eq!(
965            split_file_hunks(
966                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
967                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0A\n2A\n2b\n"])
968            ),
969            hashmap! {}
970        );
971    }
972
973    #[test]
974    fn test_split_file_hunks_non_contiguous_tail_range_insert() {
975        let commit_id1 = &CommitId::from_hex("111111");
976
977        // insert middle line to range
978        assert_eq!(
979            split_file_hunks(
980                &[(commit_id1, 0..6) /* , 6..9 */],
981                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1b\n1X\n0a\n"])
982            ),
983            hashmap! { commit_id1 => vec![(6..6, 6..9)] }
984        );
985    }
986
987    #[test]
988    fn test_split_file_hunks_non_contiguous_tail_range_insert_modify_masked() {
989        let commit_id1 = &CommitId::from_hex("111111");
990
991        // insert middle line to range, modify masked line (ambiguous)
992        assert_eq!(
993            split_file_hunks(
994                &[(commit_id1, 0..6) /* , 6..9 */],
995                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1b\n1X\n0A\n"])
996            ),
997            hashmap! {}
998        );
999    }
1000
1001    #[test]
1002    fn test_split_file_hunks_non_contiguous_tail_range_delete() {
1003        let commit_id1 = &CommitId::from_hex("111111");
1004
1005        // delete middle line from range
1006        assert_eq!(
1007            split_file_hunks(
1008                &[(commit_id1, 0..6) /* , 6..9 */],
1009                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n0a\n"])
1010            ),
1011            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
1012        );
1013        // delete all lines from range
1014        assert_eq!(
1015            split_file_hunks(
1016                &[(commit_id1, 0..6) /* , 6..9 */],
1017                &ContentDiff::by_line(["1a\n1b\n0a\n", "0a\n"])
1018            ),
1019            hashmap! { commit_id1 => vec![(0..6, 0..0)] }
1020        );
1021    }
1022
1023    #[test]
1024    fn test_split_file_hunks_non_contiguous_tail_range_delete_modify_masked() {
1025        let commit_id1 = &CommitId::from_hex("111111");
1026
1027        // delete middle line from range, modify masked line (ambiguous)
1028        assert_eq!(
1029            split_file_hunks(
1030                &[(commit_id1, 0..6) /* , 6..9 */],
1031                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n0A\n"])
1032            ),
1033            hashmap! {}
1034        );
1035        // delete all lines from range, modify masked line (ambiguous)
1036        assert_eq!(
1037            split_file_hunks(
1038                &[(commit_id1, 0..6) /* , 6..9 */],
1039                &ContentDiff::by_line(["1a\n1b\n0a\n", "0A\n"])
1040            ),
1041            hashmap! {}
1042        );
1043    }
1044
1045    #[test]
1046    fn test_split_file_hunks_non_contiguous_tail_range_delete_delete_masked() {
1047        let commit_id1 = &CommitId::from_hex("111111");
1048
1049        // 'hg absorb' accepts these, but it seems better to reject them as
1050        // ambiguous. Masked lines cannot be deleted.
1051
1052        // delete middle line from range, delete masked line (ambiguous)
1053        assert_eq!(
1054            split_file_hunks(
1055                &[(commit_id1, 0..6) /* , 6..9 */],
1056                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n"])
1057            ),
1058            hashmap! {}
1059        );
1060        // delete all lines from range, delete masked line (ambiguous)
1061        assert_eq!(
1062            split_file_hunks(
1063                &[(commit_id1, 0..6) /* , 6..9 */],
1064                &ContentDiff::by_line(["1a\n1b\n0a\n", ""])
1065            ),
1066            hashmap! {}
1067        );
1068    }
1069
1070    #[test]
1071    fn test_split_file_hunks_non_contiguous_tail_range_modify() {
1072        let commit_id1 = &CommitId::from_hex("111111");
1073
1074        // modify middle line of range
1075        assert_eq!(
1076            split_file_hunks(
1077                &[(commit_id1, 0..6) /* , 6..9 */],
1078                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1B\n0a\n"])
1079            ),
1080            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
1081        );
1082    }
1083
1084    #[test]
1085    fn test_split_file_hunks_non_contiguous_tail_range_modify_modify_masked() {
1086        let commit_id1 = &CommitId::from_hex("111111");
1087
1088        // modify middle line of range, modify masked line (ambiguous)
1089        assert_eq!(
1090            split_file_hunks(
1091                &[(commit_id1, 0..6) /* , 6..9 */],
1092                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1B\n0A\n"])
1093            ),
1094            hashmap! {}
1095        );
1096    }
1097
1098    #[test]
1099    fn test_split_file_hunks_multiple_edits() {
1100        let commit_id1 = &CommitId::from_hex("111111");
1101        let commit_id2 = &CommitId::from_hex("222222");
1102        let commit_id3 = &CommitId::from_hex("333333");
1103
1104        assert_eq!(
1105            split_file_hunks(
1106                &[
1107                    (commit_id1, 0..3),   // 1a       => 1A
1108                    (commit_id2, 3..6),   // 2a       => 2a
1109                    (commit_id1, 6..15),  // 1b 1c 1d => 1B 1d
1110                    (commit_id3, 15..21), // 3a 3b    => 3X 3A 3b 3Y
1111                ],
1112                &ContentDiff::by_line([
1113                    "1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1114                    "1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n"
1115                ])
1116            ),
1117            hashmap! {
1118                commit_id1 => vec![(0..3, 0..3), (6..12, 6..9)],
1119                commit_id3 => vec![(15..18, 12..18), (21..21, 21..24)],
1120            }
1121        );
1122    }
1123
1124    #[test]
1125    fn test_combine_texts() {
1126        assert_eq!(combine_texts(b"", b"", &[]), "");
1127        assert_eq!(combine_texts(b"foo", b"bar", &[]), "foo");
1128        assert_eq!(combine_texts(b"foo", b"bar", &[(0..3, 0..3)]), "bar");
1129
1130        assert_eq!(
1131            combine_texts(
1132                b"1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1133                b"1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n",
1134                &[(0..3, 0..3), (6..12, 6..9)]
1135            ),
1136            "1A\n2a\n1B\n1d\n3a\n3b\n"
1137        );
1138        assert_eq!(
1139            combine_texts(
1140                b"1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1141                b"1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n",
1142                &[(15..18, 12..18), (21..21, 21..24)]
1143            ),
1144            "1a\n2a\n1b\n1c\n1d\n3X\n3A\n3b\n3Y\n"
1145        );
1146    }
1147}