jj_lib/
absorb.rs

1// Copyright 2024 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Algorithm to split changes in a single source commit into its most relevant
16//! ancestors, 'absorbing' them away.
17
18use std::cmp;
19use std::collections::HashMap;
20use std::ops::Range;
21use std::rc::Rc;
22
23use bstr::BString;
24use futures::StreamExt as _;
25use itertools::Itertools as _;
26use thiserror::Error;
27
28use crate::annotate::get_annotation_with_file_content;
29use crate::backend::BackendError;
30use crate::backend::BackendResult;
31use crate::backend::CommitId;
32use crate::backend::TreeValue;
33use crate::commit::Commit;
34use crate::conflicts::materialized_diff_stream;
35use crate::conflicts::MaterializedFileValue;
36use crate::conflicts::MaterializedTreeValue;
37use crate::copies::CopyRecords;
38use crate::diff::Diff;
39use crate::diff::DiffHunkKind;
40use crate::matchers::Matcher;
41use crate::merge::Merge;
42use crate::merged_tree::MergedTree;
43use crate::merged_tree::MergedTreeBuilder;
44use crate::repo::MutableRepo;
45use crate::repo::Repo;
46use crate::repo_path::RepoPathBuf;
47use crate::revset::ResolvedRevsetExpression;
48use crate::revset::RevsetEvaluationError;
49
50/// The source commit to absorb into its ancestry.
51#[derive(Clone, Debug)]
52pub struct AbsorbSource {
53    commit: Commit,
54    parent_tree: MergedTree,
55}
56
57impl AbsorbSource {
58    /// Create an absorb source from a single commit.
59    pub fn from_commit(repo: &dyn Repo, commit: Commit) -> BackendResult<Self> {
60        let parent_tree = commit.parent_tree(repo)?;
61        Ok(AbsorbSource {
62            commit,
63            parent_tree,
64        })
65    }
66}
67
68/// Error splitting an absorb source into modified ancestry trees.
69#[derive(Debug, Error)]
70pub enum AbsorbError {
71    /// Error while contacting the Backend.
72    #[error(transparent)]
73    Backend(#[from] BackendError),
74    /// Error resolving commit ancestry.
75    #[error(transparent)]
76    RevsetEvaluation(#[from] RevsetEvaluationError),
77}
78
79/// An absorb 'plan' indicating which commits should be modified and what they
80/// should be modified to.
81#[derive(Default)]
82pub struct SelectedTrees {
83    /// Commits to be modified, to be passed to `absorb_hunks`.
84    pub target_commits: HashMap<CommitId, MergedTreeBuilder>,
85    /// Paths that were not absorbed for various error reasons.
86    pub skipped_paths: Vec<(RepoPathBuf, String)>,
87}
88
89/// Builds trees to be merged into destination commits by splitting source
90/// changes based on file annotation.
91pub async fn split_hunks_to_trees(
92    repo: &dyn Repo,
93    source: &AbsorbSource,
94    destinations: &Rc<ResolvedRevsetExpression>,
95    matcher: &dyn Matcher,
96) -> Result<SelectedTrees, AbsorbError> {
97    let mut selected_trees = SelectedTrees::default();
98
99    let left_tree = &source.parent_tree;
100    let right_tree = source.commit.tree()?;
101    // TODO: enable copy tracking if we add support for annotate and merge
102    let copy_records = CopyRecords::default();
103    let tree_diff = left_tree.diff_stream_with_copies(&right_tree, matcher, &copy_records);
104    let mut diff_stream = materialized_diff_stream(repo.store(), tree_diff);
105    while let Some(entry) = diff_stream.next().await {
106        let left_path = entry.path.source();
107        let right_path = entry.path.target();
108        let (left_value, right_value) = entry.values?;
109        let (left_text, executable) = match to_file_value(left_value) {
110            Ok(Some(mut value)) => (value.read_all(left_path)?, value.executable),
111            // New file should have no destinations
112            Ok(None) => continue,
113            Err(reason) => {
114                selected_trees
115                    .skipped_paths
116                    .push((left_path.to_owned(), reason));
117                continue;
118            }
119        };
120        let right_text = match to_file_value(right_value) {
121            Ok(Some(mut value)) => value.read_all(right_path)?,
122            // Deleted file could be absorbed, but that would require special
123            // handling to propagate deletion of the tree entry
124            Ok(None) => {
125                let reason = "Deleted file".to_owned();
126                selected_trees
127                    .skipped_paths
128                    .push((right_path.to_owned(), reason));
129                continue;
130            }
131            Err(reason) => {
132                selected_trees
133                    .skipped_paths
134                    .push((right_path.to_owned(), reason));
135                continue;
136            }
137        };
138
139        // Compute annotation of parent (= left) content to map right hunks
140        let annotation = get_annotation_with_file_content(
141            repo,
142            source.commit.id(),
143            destinations,
144            left_path,
145            left_text.clone(),
146        )?;
147        let annotation_ranges = annotation
148            .compact_line_ranges()
149            .filter_map(|(commit_id, range)| Some((commit_id.ok()?, range)))
150            .collect_vec();
151        let diff = Diff::by_line([&left_text, &right_text]);
152        let selected_ranges = split_file_hunks(&annotation_ranges, &diff);
153        // Build trees containing parent (= left) contents + selected hunks
154        for (&commit_id, ranges) in &selected_ranges {
155            let tree_builder = selected_trees
156                .target_commits
157                .entry(commit_id.clone())
158                .or_insert_with(|| MergedTreeBuilder::new(left_tree.id().clone()));
159            let new_text = combine_texts(&left_text, &right_text, ranges);
160            let id = repo
161                .store()
162                .write_file(left_path, &mut new_text.as_slice())
163                .await?;
164            tree_builder.set_or_remove(
165                left_path.to_owned(),
166                Merge::normal(TreeValue::File { id, executable }),
167            );
168        }
169    }
170
171    Ok(selected_trees)
172}
173
174type SelectedRange = (Range<usize>, Range<usize>);
175
176/// Maps `diff` hunks to commits based on the left `annotation_ranges`. The
177/// `annotation_ranges` should be compacted.
178fn split_file_hunks<'a>(
179    mut annotation_ranges: &[(&'a CommitId, Range<usize>)],
180    diff: &Diff,
181) -> HashMap<&'a CommitId, Vec<SelectedRange>> {
182    debug_assert!(annotation_ranges.iter().all(|(_, range)| !range.is_empty()));
183    let mut selected_ranges: HashMap<&CommitId, Vec<_>> = HashMap::new();
184    let mut diff_hunk_ranges = diff
185        .hunk_ranges()
186        .filter(|hunk| hunk.kind == DiffHunkKind::Different);
187    while !annotation_ranges.is_empty() {
188        let Some(hunk) = diff_hunk_ranges.next() else {
189            break;
190        };
191        let [left_range, right_range]: &[_; 2] = hunk.ranges[..].try_into().unwrap();
192        assert!(!left_range.is_empty() || !right_range.is_empty());
193        if right_range.is_empty() {
194            // If the hunk is pure deletion, it can be mapped to multiple
195            // overlapped annotation ranges unambiguously.
196            let skip = annotation_ranges
197                .iter()
198                .take_while(|(_, range)| range.end <= left_range.start)
199                .count();
200            annotation_ranges = &annotation_ranges[skip..];
201            let pre_overlap = annotation_ranges
202                .iter()
203                .take_while(|(_, range)| range.end < left_range.end)
204                .count();
205            let maybe_overlapped_ranges = annotation_ranges.get(..pre_overlap + 1);
206            annotation_ranges = &annotation_ranges[pre_overlap..];
207            let Some(overlapped_ranges) = maybe_overlapped_ranges else {
208                continue;
209            };
210            // Ensure that the ranges are contiguous and include the start.
211            let all_covered = overlapped_ranges
212                .iter()
213                .try_fold(left_range.start, |prev_end, (_, cur)| {
214                    (cur.start <= prev_end).then_some(cur.end)
215                })
216                .inspect(|&last_end| assert!(left_range.end <= last_end))
217                .is_some();
218            if all_covered {
219                for (commit_id, cur_range) in overlapped_ranges {
220                    let start = cmp::max(cur_range.start, left_range.start);
221                    let end = cmp::min(cur_range.end, left_range.end);
222                    assert!(start < end);
223                    let selected = selected_ranges.entry(commit_id).or_default();
224                    selected.push((start..end, right_range.clone()));
225                }
226            }
227        } else {
228            // In other cases, the hunk should be included in an annotation
229            // range to map it unambiguously. Skip any pre-overlapped ranges.
230            let skip = annotation_ranges
231                .iter()
232                .take_while(|(_, range)| range.end < left_range.end)
233                .count();
234            annotation_ranges = &annotation_ranges[skip..];
235            let Some((commit_id, cur_range)) = annotation_ranges.first() else {
236                continue;
237            };
238            let contained = cur_range.start <= left_range.start && left_range.end <= cur_range.end;
239            // If the hunk is pure insertion, it can be mapped to two distinct
240            // annotation ranges, which is ambiguous.
241            let ambiguous = cur_range.end == left_range.start
242                && annotation_ranges
243                    .get(1)
244                    .is_some_and(|(_, next_range)| next_range.start == left_range.end);
245            if contained && !ambiguous {
246                let selected = selected_ranges.entry(commit_id).or_default();
247                selected.push((left_range.clone(), right_range.clone()));
248            }
249        }
250    }
251    selected_ranges
252}
253
254/// Constructs new text by replacing `text1` range with `text2` range for each
255/// selected `(range1, range2)` pairs.
256fn combine_texts(text1: &[u8], text2: &[u8], selected_ranges: &[SelectedRange]) -> BString {
257    itertools::chain!(
258        [(0..0, 0..0)],
259        selected_ranges.iter().cloned(),
260        [(text1.len()..text1.len(), text2.len()..text2.len())],
261    )
262    .tuple_windows()
263    // Copy unchanged hunk from text1 and current hunk from text2
264    .map(|((prev1, _), (cur1, cur2))| (prev1.end..cur1.start, cur2))
265    .flat_map(|(range1, range2)| [&text1[range1], &text2[range2]])
266    .collect()
267}
268
269/// Describes changes made by [`absorb_hunks()`].
270#[derive(Clone, Debug)]
271pub struct AbsorbStats {
272    /// Rewritten source commit which the absorbed hunks were removed, or `None`
273    /// if the source commit was abandoned or no hunks were moved.
274    pub rewritten_source: Option<Commit>,
275    /// Rewritten commits which the source hunks were absorbed into, in forward
276    /// topological order.
277    pub rewritten_destinations: Vec<Commit>,
278    /// Number of descendant commits which were rebased. The number of rewritten
279    /// destination commits are not included.
280    pub num_rebased: usize,
281}
282
283/// Merges selected trees into the specified commits. Abandons the source commit
284/// if it becomes discardable.
285pub fn absorb_hunks(
286    repo: &mut MutableRepo,
287    source: &AbsorbSource,
288    mut selected_trees: HashMap<CommitId, MergedTreeBuilder>,
289) -> BackendResult<AbsorbStats> {
290    let store = repo.store().clone();
291    let mut rewritten_source = None;
292    let mut rewritten_destinations = Vec::new();
293    let mut num_rebased = 0;
294    // Rewrite commits in topological order so that descendant commits wouldn't
295    // be rewritten multiple times.
296    repo.transform_descendants(selected_trees.keys().cloned().collect(), |rewriter| {
297        // Remove selected hunks from the source commit by reparent()
298        if rewriter.old_commit().id() == source.commit.id() {
299            let commit_builder = rewriter.reparent();
300            if commit_builder.is_discardable()? {
301                commit_builder.abandon();
302            } else {
303                rewritten_source = Some(commit_builder.write()?);
304                num_rebased += 1;
305            }
306            return Ok(());
307        }
308        let Some(tree_builder) = selected_trees.remove(rewriter.old_commit().id()) else {
309            rewriter.rebase()?.write()?;
310            num_rebased += 1;
311            return Ok(());
312        };
313        // Merge hunks between source parent tree and selected tree
314        let selected_tree_id = tree_builder.write_tree(&store)?;
315        let commit_builder = rewriter.rebase()?;
316        let destination_tree = store.get_root_tree(commit_builder.tree_id())?;
317        let selected_tree = store.get_root_tree(&selected_tree_id)?;
318        let new_tree = destination_tree.merge(&source.parent_tree, &selected_tree)?;
319        let mut predecessors = commit_builder.predecessors().to_vec();
320        predecessors.push(source.commit.id().clone());
321        let new_commit = commit_builder
322            .set_tree_id(new_tree.id())
323            .set_predecessors(predecessors)
324            .write()?;
325        rewritten_destinations.push(new_commit);
326        Ok(())
327    })?;
328    Ok(AbsorbStats {
329        rewritten_source,
330        rewritten_destinations,
331        num_rebased,
332    })
333}
334
335fn to_file_value(value: MaterializedTreeValue) -> Result<Option<MaterializedFileValue>, String> {
336    match value {
337        MaterializedTreeValue::Absent => Ok(None), // New or deleted file
338        MaterializedTreeValue::AccessDenied(err) => Err(format!("Access is denied: {err}")),
339        MaterializedTreeValue::File(file) => Ok(Some(file)),
340        MaterializedTreeValue::Symlink { .. } => Err("Is a symlink".into()),
341        MaterializedTreeValue::FileConflict { .. }
342        | MaterializedTreeValue::OtherConflict { .. } => Err("Is a conflict".into()),
343        MaterializedTreeValue::GitSubmodule(_) => Err("Is a Git submodule".into()),
344        MaterializedTreeValue::Tree(_) => panic!("diff should not contain trees"),
345    }
346}
347
348#[cfg(test)]
349mod tests {
350    use maplit::hashmap;
351
352    use super::*;
353
354    #[test]
355    fn test_split_file_hunks_empty_or_single_line() {
356        let commit_id1 = &CommitId::from_hex("111111");
357
358        // unchanged
359        assert_eq!(split_file_hunks(&[], &Diff::by_line(["", ""])), hashmap! {});
360
361        // insert single line
362        assert_eq!(
363            split_file_hunks(&[], &Diff::by_line(["", "2X\n"])),
364            hashmap! {}
365        );
366        // delete single line
367        assert_eq!(
368            split_file_hunks(&[(commit_id1, 0..3)], &Diff::by_line(["1a\n", ""])),
369            hashmap! { commit_id1 => vec![(0..3, 0..0)] }
370        );
371        // modify single line
372        assert_eq!(
373            split_file_hunks(&[(commit_id1, 0..3)], &Diff::by_line(["1a\n", "1AA\n"])),
374            hashmap! { commit_id1 => vec![(0..3, 0..4)] }
375        );
376    }
377
378    #[test]
379    fn test_split_file_hunks_single_range() {
380        let commit_id1 = &CommitId::from_hex("111111");
381
382        // insert first, middle, and last lines
383        assert_eq!(
384            split_file_hunks(
385                &[(commit_id1, 0..6)],
386                &Diff::by_line(["1a\n1b\n", "1X\n1a\n1Y\n1b\n1Z\n"])
387            ),
388            hashmap! {
389                commit_id1 => vec![(0..0, 0..3), (3..3, 6..9), (6..6, 12..15)],
390            }
391        );
392        // delete first, middle, and last lines
393        assert_eq!(
394            split_file_hunks(
395                &[(commit_id1, 0..15)],
396                &Diff::by_line(["1a\n1b\n1c\n1d\n1e\n1f\n", "1b\n1d\n1f\n"])
397            ),
398            hashmap! {
399                commit_id1 => vec![(0..3, 0..0), (6..9, 3..3), (12..15, 6..6)],
400            }
401        );
402        // modify non-contiguous lines
403        assert_eq!(
404            split_file_hunks(
405                &[(commit_id1, 0..12)],
406                &Diff::by_line(["1a\n1b\n1c\n1d\n", "1A\n1b\n1C\n1d\n"])
407            ),
408            hashmap! { commit_id1 => vec![(0..3, 0..3), (6..9, 6..9)] }
409        );
410    }
411
412    #[test]
413    fn test_split_file_hunks_contiguous_ranges_insert() {
414        let commit_id1 = &CommitId::from_hex("111111");
415        let commit_id2 = &CommitId::from_hex("222222");
416
417        // insert first line
418        assert_eq!(
419            split_file_hunks(
420                &[(commit_id1, 0..6), (commit_id2, 6..12)],
421                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1X\n1a\n1b\n2a\n2b\n"])
422            ),
423            hashmap! { commit_id1 => vec![(0..0, 0..3)] }
424        );
425        // insert middle line to first range
426        assert_eq!(
427            split_file_hunks(
428                &[(commit_id1, 0..6), (commit_id2, 6..12)],
429                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1X\n1b\n2a\n2b\n"])
430            ),
431            hashmap! { commit_id1 => vec![(3..3, 3..6)] }
432        );
433        // insert middle line between ranges (ambiguous)
434        assert_eq!(
435            split_file_hunks(
436                &[(commit_id1, 0..6), (commit_id2, 6..12)],
437                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n3X\n2a\n2b\n"])
438            ),
439            hashmap! {}
440        );
441        // insert middle line to second range
442        assert_eq!(
443            split_file_hunks(
444                &[(commit_id1, 0..6), (commit_id2, 6..12)],
445                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2X\n2b\n"])
446            ),
447            hashmap! { commit_id2 => vec![(9..9, 9..12)] }
448        );
449        // insert last line
450        assert_eq!(
451            split_file_hunks(
452                &[(commit_id1, 0..6), (commit_id2, 6..12)],
453                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2b\n2X\n"])
454            ),
455            hashmap! { commit_id2 => vec![(12..12, 12..15)] }
456        );
457    }
458
459    #[test]
460    fn test_split_file_hunks_contiguous_ranges_delete() {
461        let commit_id1 = &CommitId::from_hex("111111");
462        let commit_id2 = &CommitId::from_hex("222222");
463
464        // delete first line
465        assert_eq!(
466            split_file_hunks(
467                &[(commit_id1, 0..6), (commit_id2, 6..12)],
468                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1b\n2a\n2b\n"])
469            ),
470            hashmap! { commit_id1 => vec![(0..3, 0..0)] }
471        );
472        // delete middle line from first range
473        assert_eq!(
474            split_file_hunks(
475                &[(commit_id1, 0..6), (commit_id2, 6..12)],
476                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n2a\n2b\n"])
477            ),
478            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
479        );
480        // delete middle line from second range
481        assert_eq!(
482            split_file_hunks(
483                &[(commit_id1, 0..6), (commit_id2, 6..12)],
484                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2b\n"])
485            ),
486            hashmap! { commit_id2 => vec![(6..9, 6..6)] }
487        );
488        // delete last line
489        assert_eq!(
490            split_file_hunks(
491                &[(commit_id1, 0..6), (commit_id2, 6..12)],
492                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n"])
493            ),
494            hashmap! { commit_id2 => vec![(9..12, 9..9)] }
495        );
496        // delete first and last lines
497        assert_eq!(
498            split_file_hunks(
499                &[(commit_id1, 0..6), (commit_id2, 6..12)],
500                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1b\n2a\n"])
501            ),
502            hashmap! {
503                commit_id1 => vec![(0..3, 0..0)],
504                commit_id2 => vec![(9..12, 6..6)],
505            }
506        );
507
508        // delete across ranges (split first annotation range)
509        assert_eq!(
510            split_file_hunks(
511                &[(commit_id1, 0..6), (commit_id2, 6..12)],
512                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n"])
513            ),
514            hashmap! {
515                commit_id1 => vec![(3..6, 3..3)],
516                commit_id2 => vec![(6..12, 3..3)],
517            }
518        );
519        // delete middle lines across ranges (split both annotation ranges)
520        assert_eq!(
521            split_file_hunks(
522                &[(commit_id1, 0..6), (commit_id2, 6..12)],
523                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n2b\n"])
524            ),
525            hashmap! {
526                commit_id1 => vec![(3..6, 3..3)],
527                commit_id2 => vec![(6..9, 3..3)],
528            }
529        );
530        // delete across ranges (split second annotation range)
531        assert_eq!(
532            split_file_hunks(
533                &[(commit_id1, 0..6), (commit_id2, 6..12)],
534                &Diff::by_line(["1a\n1b\n2a\n2b\n", "2b\n"])
535            ),
536            hashmap! {
537                commit_id1 => vec![(0..6, 0..0)],
538                commit_id2 => vec![(6..9, 0..0)],
539            }
540        );
541
542        // delete all
543        assert_eq!(
544            split_file_hunks(
545                &[(commit_id1, 0..6), (commit_id2, 6..12)],
546                &Diff::by_line(["1a\n1b\n2a\n2b\n", ""])
547            ),
548            hashmap! {
549                commit_id1 => vec![(0..6, 0..0)],
550                commit_id2 => vec![(6..12, 0..0)],
551            }
552        );
553    }
554
555    #[test]
556    fn test_split_file_hunks_contiguous_ranges_modify() {
557        let commit_id1 = &CommitId::from_hex("111111");
558        let commit_id2 = &CommitId::from_hex("222222");
559
560        // modify first line
561        assert_eq!(
562            split_file_hunks(
563                &[(commit_id1, 0..6), (commit_id2, 6..12)],
564                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2a\n2b\n"])
565            ),
566            hashmap! { commit_id1 => vec![(0..3, 0..3)] }
567        );
568        // modify middle line of first range
569        assert_eq!(
570            split_file_hunks(
571                &[(commit_id1, 0..6), (commit_id2, 6..12)],
572                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2a\n2b\n"])
573            ),
574            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
575        );
576        // modify middle lines of both ranges (ambiguous)
577        // ('hg absorb' accepts this)
578        assert_eq!(
579            split_file_hunks(
580                &[(commit_id1, 0..6), (commit_id2, 6..12)],
581                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2A\n2b\n"])
582            ),
583            hashmap! {}
584        );
585        // modify middle line of second range
586        assert_eq!(
587            split_file_hunks(
588                &[(commit_id1, 0..6), (commit_id2, 6..12)],
589                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2A\n2b\n"])
590            ),
591            hashmap! { commit_id2 => vec![(6..9, 6..9)] }
592        );
593        // modify last line
594        assert_eq!(
595            split_file_hunks(
596                &[(commit_id1, 0..6), (commit_id2, 6..12)],
597                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2B\n"])
598            ),
599            hashmap! { commit_id2 => vec![(9..12, 9..12)] }
600        );
601        // modify first and last lines
602        assert_eq!(
603            split_file_hunks(
604                &[(commit_id1, 0..6), (commit_id2, 6..12)],
605                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2a\n2B\n"])
606            ),
607            hashmap! {
608                commit_id1 => vec![(0..3, 0..3)],
609                commit_id2 => vec![(9..12, 9..12)],
610            }
611        );
612    }
613
614    #[test]
615    fn test_split_file_hunks_contiguous_ranges_modify_insert() {
616        let commit_id1 = &CommitId::from_hex("111111");
617        let commit_id2 = &CommitId::from_hex("222222");
618
619        // modify first range, insert adjacent middle line
620        assert_eq!(
621            split_file_hunks(
622                &[(commit_id1, 0..6), (commit_id2, 6..12)],
623                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1B\n1X\n2a\n2b\n"])
624            ),
625            hashmap! { commit_id1 => vec![(0..6, 0..9)] }
626        );
627        // modify second range, insert adjacent middle line
628        assert_eq!(
629            split_file_hunks(
630                &[(commit_id1, 0..6), (commit_id2, 6..12)],
631                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2X\n2A\n2B\n"])
632            ),
633            hashmap! { commit_id2 => vec![(6..12, 6..15)] }
634        );
635        // modify second range, insert last line
636        assert_eq!(
637            split_file_hunks(
638                &[(commit_id1, 0..6), (commit_id2, 6..12)],
639                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2A\n2B\n2X\n"])
640            ),
641            hashmap! { commit_id2 => vec![(6..12, 6..15)] }
642        );
643        // modify first and last lines (unambiguous), insert middle line between
644        // ranges (ambiguous)
645        assert_eq!(
646            split_file_hunks(
647                &[(commit_id1, 0..6), (commit_id2, 6..12)],
648                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n3X\n2a\n2B\n"])
649            ),
650            hashmap! {
651                commit_id1 => vec![(0..3, 0..3)],
652                commit_id2 => vec![(9..12, 12..15)],
653            }
654        );
655    }
656
657    #[test]
658    fn test_split_file_hunks_contiguous_ranges_modify_delete() {
659        let commit_id1 = &CommitId::from_hex("111111");
660        let commit_id2 = &CommitId::from_hex("222222");
661
662        // modify first line, delete adjacent middle line
663        assert_eq!(
664            split_file_hunks(
665                &[(commit_id1, 0..6), (commit_id2, 6..12)],
666                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1A\n2a\n2b\n"])
667            ),
668            hashmap! { commit_id1 => vec![(0..6, 0..3)] }
669        );
670        // modify last line, delete adjacent middle line
671        assert_eq!(
672            split_file_hunks(
673                &[(commit_id1, 0..6), (commit_id2, 6..12)],
674                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2B\n"])
675            ),
676            hashmap! { commit_id2 => vec![(6..12, 6..9)] }
677        );
678        // modify first and last lines, delete middle line from first range
679        assert_eq!(
680            split_file_hunks(
681                &[(commit_id1, 0..6), (commit_id2, 6..12)],
682                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1A\n2a\n2B\n"])
683            ),
684            hashmap! {
685                commit_id1 => vec![(0..6, 0..3)],
686                commit_id2 => vec![(9..12, 6..9)],
687            }
688        );
689        // modify first and last lines, delete middle line from second range
690        assert_eq!(
691            split_file_hunks(
692                &[(commit_id1, 0..6), (commit_id2, 6..12)],
693                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2B\n"])
694            ),
695            hashmap! {
696                commit_id1 => vec![(0..3, 0..3)],
697                commit_id2 => vec![(6..12, 6..9)],
698            }
699        );
700        // modify middle line, delete adjacent middle line (ambiguous)
701        assert_eq!(
702            split_file_hunks(
703                &[(commit_id1, 0..6), (commit_id2, 6..12)],
704                &Diff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2b\n"])
705            ),
706            hashmap! {}
707        );
708    }
709
710    #[test]
711    fn test_split_file_hunks_non_contiguous_ranges_insert() {
712        let commit_id1 = &CommitId::from_hex("111111");
713        let commit_id2 = &CommitId::from_hex("222222");
714
715        // insert middle line to first range
716        assert_eq!(
717            split_file_hunks(
718                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
719                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0a\n2a\n2b\n"])
720            ),
721            hashmap! { commit_id1 => vec![(6..6, 6..9)] }
722        );
723        // insert middle line to second range
724        assert_eq!(
725            split_file_hunks(
726                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
727                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2X\n2a\n2b\n"])
728            ),
729            hashmap! { commit_id2 => vec![(9..9, 9..12)] }
730        );
731        // insert middle lines to both ranges
732        assert_eq!(
733            split_file_hunks(
734                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
735                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0a\n2X\n2a\n2b\n"])
736            ),
737            hashmap! {
738                commit_id1 => vec![(6..6, 6..9)],
739                commit_id2 => vec![(9..9, 12..15)],
740            }
741        );
742    }
743
744    #[test]
745    fn test_split_file_hunks_non_contiguous_ranges_insert_modify_masked() {
746        let commit_id1 = &CommitId::from_hex("111111");
747        let commit_id2 = &CommitId::from_hex("222222");
748
749        // insert middle line to first range, modify masked line (ambiguous)
750        assert_eq!(
751            split_file_hunks(
752                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
753                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0A\n2a\n2b\n"])
754            ),
755            hashmap! {}
756        );
757        // insert middle line to second range, modify masked line (ambiguous)
758        assert_eq!(
759            split_file_hunks(
760                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
761                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2X\n2a\n2b\n"])
762            ),
763            hashmap! {}
764        );
765        // insert middle lines to both ranges, modify masked line (ambiguous)
766        assert_eq!(
767            split_file_hunks(
768                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
769                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0A\n2X\n2a\n2b\n"])
770            ),
771            hashmap! {}
772        );
773    }
774
775    #[test]
776    fn test_split_file_hunks_non_contiguous_ranges_delete() {
777        let commit_id1 = &CommitId::from_hex("111111");
778        let commit_id2 = &CommitId::from_hex("222222");
779
780        // delete middle line from first range
781        assert_eq!(
782            split_file_hunks(
783                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
784                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0a\n2a\n2b\n"])
785            ),
786            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
787        );
788        // delete middle line from second range
789        assert_eq!(
790            split_file_hunks(
791                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
792                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2b\n"])
793            ),
794            hashmap! { commit_id2 => vec![(9..12, 9..9)] }
795        );
796        // delete middle lines from both ranges
797        assert_eq!(
798            split_file_hunks(
799                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
800                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0a\n2b\n"])
801            ),
802            hashmap! {
803                commit_id1 => vec![(3..6, 3..3)],
804                commit_id2 => vec![(9..12, 6..6)],
805            }
806        );
807    }
808
809    #[test]
810    fn test_split_file_hunks_non_contiguous_ranges_delete_modify_masked() {
811        let commit_id1 = &CommitId::from_hex("111111");
812        let commit_id2 = &CommitId::from_hex("222222");
813
814        // delete middle line from first range, modify masked line (ambiguous)
815        assert_eq!(
816            split_file_hunks(
817                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
818                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0A\n2a\n2b\n"])
819            ),
820            hashmap! {}
821        );
822        // delete middle line from second range, modify masked line (ambiguous)
823        assert_eq!(
824            split_file_hunks(
825                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
826                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2b\n"])
827            ),
828            hashmap! {}
829        );
830        // delete middle lines from both ranges, modify masked line (ambiguous)
831        assert_eq!(
832            split_file_hunks(
833                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
834                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0A\n2b\n"])
835            ),
836            hashmap! {}
837        );
838    }
839
840    #[test]
841    fn test_split_file_hunks_non_contiguous_ranges_delete_delete_masked() {
842        let commit_id1 = &CommitId::from_hex("111111");
843        let commit_id2 = &CommitId::from_hex("222222");
844
845        // 'hg absorb' accepts these, but it seems better to reject them as
846        // ambiguous. Masked lines cannot be deleted.
847
848        // delete middle line from first range, delete masked line (ambiguous)
849        assert_eq!(
850            split_file_hunks(
851                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
852                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n2a\n2b\n"])
853            ),
854            hashmap! {}
855        );
856        // delete middle line from second range, delete masked line (ambiguous)
857        assert_eq!(
858            split_file_hunks(
859                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
860                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n2b\n"])
861            ),
862            hashmap! {}
863        );
864        // delete middle lines from both ranges, delete masked line (ambiguous)
865        assert_eq!(
866            split_file_hunks(
867                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
868                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n2b\n"])
869            ),
870            hashmap! {}
871        );
872    }
873
874    #[test]
875    fn test_split_file_hunks_non_contiguous_ranges_modify() {
876        let commit_id1 = &CommitId::from_hex("111111");
877        let commit_id2 = &CommitId::from_hex("222222");
878
879        // modify middle line of first range
880        assert_eq!(
881            split_file_hunks(
882                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
883                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0a\n2a\n2b\n"])
884            ),
885            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
886        );
887        // modify middle line of second range
888        assert_eq!(
889            split_file_hunks(
890                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
891                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2A\n2b\n"])
892            ),
893            hashmap! { commit_id2 => vec![(9..12, 9..12)] }
894        );
895        // modify middle lines of both ranges
896        assert_eq!(
897            split_file_hunks(
898                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
899                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0a\n2A\n2b\n"])
900            ),
901            hashmap! {
902                commit_id1 => vec![(3..6, 3..6)],
903                commit_id2 => vec![(9..12, 9..12)],
904            }
905        );
906    }
907
908    #[test]
909    fn test_split_file_hunks_non_contiguous_ranges_modify_modify_masked() {
910        let commit_id1 = &CommitId::from_hex("111111");
911        let commit_id2 = &CommitId::from_hex("222222");
912
913        // modify middle line of first range, modify masked line (ambiguous)
914        assert_eq!(
915            split_file_hunks(
916                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
917                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0A\n2a\n2b\n"])
918            ),
919            hashmap! {}
920        );
921        // modify middle line of second range, modify masked line (ambiguous)
922        assert_eq!(
923            split_file_hunks(
924                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
925                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2A\n2b\n"])
926            ),
927            hashmap! {}
928        );
929        // modify middle lines to both ranges, modify masked line (ambiguous)
930        assert_eq!(
931            split_file_hunks(
932                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
933                &Diff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0A\n2A\n2b\n"])
934            ),
935            hashmap! {}
936        );
937    }
938
939    #[test]
940    fn test_split_file_hunks_non_contiguous_tail_range_insert() {
941        let commit_id1 = &CommitId::from_hex("111111");
942
943        // insert middle line to range
944        assert_eq!(
945            split_file_hunks(
946                &[(commit_id1, 0..6) /* , 6..9 */],
947                &Diff::by_line(["1a\n1b\n0a\n", "1a\n1b\n1X\n0a\n"])
948            ),
949            hashmap! { commit_id1 => vec![(6..6, 6..9)] }
950        );
951    }
952
953    #[test]
954    fn test_split_file_hunks_non_contiguous_tail_range_insert_modify_masked() {
955        let commit_id1 = &CommitId::from_hex("111111");
956
957        // insert middle line to range, modify masked line (ambiguous)
958        assert_eq!(
959            split_file_hunks(
960                &[(commit_id1, 0..6) /* , 6..9 */],
961                &Diff::by_line(["1a\n1b\n0a\n", "1a\n1b\n1X\n0A\n"])
962            ),
963            hashmap! {}
964        );
965    }
966
967    #[test]
968    fn test_split_file_hunks_non_contiguous_tail_range_delete() {
969        let commit_id1 = &CommitId::from_hex("111111");
970
971        // delete middle line from range
972        assert_eq!(
973            split_file_hunks(
974                &[(commit_id1, 0..6) /* , 6..9 */],
975                &Diff::by_line(["1a\n1b\n0a\n", "1a\n0a\n"])
976            ),
977            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
978        );
979        // delete all lines from range
980        assert_eq!(
981            split_file_hunks(
982                &[(commit_id1, 0..6) /* , 6..9 */],
983                &Diff::by_line(["1a\n1b\n0a\n", "0a\n"])
984            ),
985            hashmap! { commit_id1 => vec![(0..6, 0..0)] }
986        );
987    }
988
989    #[test]
990    fn test_split_file_hunks_non_contiguous_tail_range_delete_modify_masked() {
991        let commit_id1 = &CommitId::from_hex("111111");
992
993        // delete middle line from range, modify masked line (ambiguous)
994        assert_eq!(
995            split_file_hunks(
996                &[(commit_id1, 0..6) /* , 6..9 */],
997                &Diff::by_line(["1a\n1b\n0a\n", "1a\n0A\n"])
998            ),
999            hashmap! {}
1000        );
1001        // delete all lines from range, modify masked line (ambiguous)
1002        assert_eq!(
1003            split_file_hunks(
1004                &[(commit_id1, 0..6) /* , 6..9 */],
1005                &Diff::by_line(["1a\n1b\n0a\n", "0A\n"])
1006            ),
1007            hashmap! {}
1008        );
1009    }
1010
1011    #[test]
1012    fn test_split_file_hunks_non_contiguous_tail_range_delete_delete_masked() {
1013        let commit_id1 = &CommitId::from_hex("111111");
1014
1015        // 'hg absorb' accepts these, but it seems better to reject them as
1016        // ambiguous. Masked lines cannot be deleted.
1017
1018        // delete middle line from range, delete masked line (ambiguous)
1019        assert_eq!(
1020            split_file_hunks(
1021                &[(commit_id1, 0..6) /* , 6..9 */],
1022                &Diff::by_line(["1a\n1b\n0a\n", "1a\n"])
1023            ),
1024            hashmap! {}
1025        );
1026        // delete all lines from range, delete masked line (ambiguous)
1027        assert_eq!(
1028            split_file_hunks(
1029                &[(commit_id1, 0..6) /* , 6..9 */],
1030                &Diff::by_line(["1a\n1b\n0a\n", ""])
1031            ),
1032            hashmap! {}
1033        );
1034    }
1035
1036    #[test]
1037    fn test_split_file_hunks_non_contiguous_tail_range_modify() {
1038        let commit_id1 = &CommitId::from_hex("111111");
1039
1040        // modify middle line of range
1041        assert_eq!(
1042            split_file_hunks(
1043                &[(commit_id1, 0..6) /* , 6..9 */],
1044                &Diff::by_line(["1a\n1b\n0a\n", "1a\n1B\n0a\n"])
1045            ),
1046            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
1047        );
1048    }
1049
1050    #[test]
1051    fn test_split_file_hunks_non_contiguous_tail_range_modify_modify_masked() {
1052        let commit_id1 = &CommitId::from_hex("111111");
1053
1054        // modify middle line of range, modify masked line (ambiguous)
1055        assert_eq!(
1056            split_file_hunks(
1057                &[(commit_id1, 0..6) /* , 6..9 */],
1058                &Diff::by_line(["1a\n1b\n0a\n", "1a\n1B\n0A\n"])
1059            ),
1060            hashmap! {}
1061        );
1062    }
1063
1064    #[test]
1065    fn test_split_file_hunks_multiple_edits() {
1066        let commit_id1 = &CommitId::from_hex("111111");
1067        let commit_id2 = &CommitId::from_hex("222222");
1068        let commit_id3 = &CommitId::from_hex("333333");
1069
1070        assert_eq!(
1071            split_file_hunks(
1072                &[
1073                    (commit_id1, 0..3),   // 1a       => 1A
1074                    (commit_id2, 3..6),   // 2a       => 2a
1075                    (commit_id1, 6..15),  // 1b 1c 1d => 1B 1d
1076                    (commit_id3, 15..21), // 3a 3b    => 3X 3A 3b 3Y
1077                ],
1078                &Diff::by_line([
1079                    "1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1080                    "1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n"
1081                ])
1082            ),
1083            hashmap! {
1084                commit_id1 => vec![(0..3, 0..3), (6..12, 6..9)],
1085                commit_id3 => vec![(15..18, 12..18), (21..21, 21..24)],
1086            }
1087        );
1088    }
1089
1090    #[test]
1091    fn test_combine_texts() {
1092        assert_eq!(combine_texts(b"", b"", &[]), "");
1093        assert_eq!(combine_texts(b"foo", b"bar", &[]), "foo");
1094        assert_eq!(combine_texts(b"foo", b"bar", &[(0..3, 0..3)]), "bar");
1095
1096        assert_eq!(
1097            combine_texts(
1098                b"1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1099                b"1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n",
1100                &[(0..3, 0..3), (6..12, 6..9)]
1101            ),
1102            "1A\n2a\n1B\n1d\n3a\n3b\n"
1103        );
1104        assert_eq!(
1105            combine_texts(
1106                b"1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1107                b"1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n",
1108                &[(15..18, 12..18), (21..21, 21..24)]
1109            ),
1110            "1a\n2a\n1b\n1c\n1d\n3X\n3A\n3b\n3Y\n"
1111        );
1112    }
1113}