jj_lib/
absorb.rs

1// Copyright 2024 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Algorithm to split changes in a single source commit into its most relevant
16//! ancestors, 'absorbing' them away.
17
18use std::cmp;
19use std::collections::HashMap;
20use std::ops::Range;
21use std::sync::Arc;
22
23use bstr::BString;
24use futures::StreamExt as _;
25use itertools::Itertools as _;
26use pollster::FutureExt as _;
27use thiserror::Error;
28
29use crate::annotate::FileAnnotator;
30use crate::backend::BackendError;
31use crate::backend::BackendResult;
32use crate::backend::CommitId;
33use crate::backend::TreeValue;
34use crate::commit::Commit;
35use crate::conflicts::MaterializedFileValue;
36use crate::conflicts::MaterializedTreeValue;
37use crate::conflicts::materialized_diff_stream;
38use crate::copies::CopyRecords;
39use crate::diff::ContentDiff;
40use crate::diff::DiffHunkKind;
41use crate::matchers::Matcher;
42use crate::merge::Merge;
43use crate::merged_tree::MergedTree;
44use crate::merged_tree::MergedTreeBuilder;
45use crate::repo::MutableRepo;
46use crate::repo::Repo;
47use crate::repo_path::RepoPathBuf;
48use crate::revset::ResolvedRevsetExpression;
49use crate::revset::RevsetEvaluationError;
50
51/// The source commit to absorb into its ancestry.
52#[derive(Clone, Debug)]
53pub struct AbsorbSource {
54    commit: Commit,
55    parent_tree: MergedTree,
56}
57
58impl AbsorbSource {
59    /// Create an absorb source from a single commit.
60    pub fn from_commit(repo: &dyn Repo, commit: Commit) -> BackendResult<Self> {
61        let parent_tree = commit.parent_tree(repo)?;
62        Ok(Self {
63            commit,
64            parent_tree,
65        })
66    }
67}
68
69/// Error splitting an absorb source into modified ancestry trees.
70#[derive(Debug, Error)]
71pub enum AbsorbError {
72    /// Error while contacting the Backend.
73    #[error(transparent)]
74    Backend(#[from] BackendError),
75    /// Error resolving commit ancestry.
76    #[error(transparent)]
77    RevsetEvaluation(#[from] RevsetEvaluationError),
78}
79
80/// An absorb 'plan' indicating which commits should be modified and what they
81/// should be modified to.
82#[derive(Default)]
83pub struct SelectedTrees {
84    /// Commits to be modified, to be passed to `absorb_hunks`.
85    pub target_commits: HashMap<CommitId, MergedTreeBuilder>,
86    /// Paths that were not absorbed for various error reasons.
87    pub skipped_paths: Vec<(RepoPathBuf, String)>,
88}
89
90/// Builds trees to be merged into destination commits by splitting source
91/// changes based on file annotation.
92pub async fn split_hunks_to_trees(
93    repo: &dyn Repo,
94    source: &AbsorbSource,
95    destinations: &Arc<ResolvedRevsetExpression>,
96    matcher: &dyn Matcher,
97) -> Result<SelectedTrees, AbsorbError> {
98    let mut selected_trees = SelectedTrees::default();
99
100    let left_tree = &source.parent_tree;
101    let right_tree = source.commit.tree();
102    // TODO: enable copy tracking if we add support for annotate and merge
103    let copy_records = CopyRecords::default();
104    let tree_diff = left_tree.diff_stream_with_copies(&right_tree, matcher, &copy_records);
105    let mut diff_stream = materialized_diff_stream(repo.store(), tree_diff);
106    while let Some(entry) = diff_stream.next().await {
107        let left_path = entry.path.source();
108        let right_path = entry.path.target();
109        let values = entry.values?;
110        let (left_text, executable, copy_id) = match to_file_value(values.before) {
111            Ok(Some(mut value)) => (
112                value.read_all(left_path).await?,
113                value.executable,
114                value.copy_id,
115            ),
116            // New file should have no destinations
117            Ok(None) => continue,
118            Err(reason) => {
119                selected_trees
120                    .skipped_paths
121                    .push((left_path.to_owned(), reason));
122                continue;
123            }
124        };
125        let (right_text, deleted) = match to_file_value(values.after) {
126            Ok(Some(mut value)) => (value.read_all(right_path).await?, false),
127            Ok(None) => (vec![], true),
128            Err(reason) => {
129                selected_trees
130                    .skipped_paths
131                    .push((right_path.to_owned(), reason));
132                continue;
133            }
134        };
135
136        // Compute annotation of parent (= left) content to map right hunks
137        let mut annotator =
138            FileAnnotator::with_file_content(source.commit.id(), left_path, left_text.clone());
139        annotator.compute(repo, destinations)?;
140        let annotation = annotator.to_annotation();
141        let annotation_ranges = annotation
142            .compact_line_ranges()
143            .filter_map(|(commit_id, range)| Some((commit_id.ok()?, range)))
144            .collect_vec();
145        let diff = ContentDiff::by_line([&left_text, &right_text]);
146        let selected_ranges = split_file_hunks(&annotation_ranges, &diff);
147        // Build trees containing parent (= left) contents + selected hunks
148        for (&commit_id, ranges) in &selected_ranges {
149            let tree_builder = selected_trees
150                .target_commits
151                .entry(commit_id.clone())
152                .or_insert_with(|| MergedTreeBuilder::new(left_tree.clone()));
153            let new_text = combine_texts(&left_text, &right_text, ranges);
154            // Since changes to be absorbed are represented as diffs relative to
155            // the source parent, we can propagate file deletion only if the
156            // whole file content is deleted at a single destination commit.
157            let new_tree_value = if new_text.is_empty() && deleted {
158                Merge::absent()
159            } else {
160                let id = repo
161                    .store()
162                    .write_file(left_path, &mut new_text.as_slice())
163                    .await?;
164                Merge::normal(TreeValue::File {
165                    id,
166                    executable,
167                    copy_id: copy_id.clone(),
168                })
169            };
170            tree_builder.set_or_remove(left_path.to_owned(), new_tree_value);
171        }
172    }
173
174    Ok(selected_trees)
175}
176
177type SelectedRange = (Range<usize>, Range<usize>);
178
179/// Maps `diff` hunks to commits based on the left `annotation_ranges`. The
180/// `annotation_ranges` should be compacted.
181fn split_file_hunks<'a>(
182    mut annotation_ranges: &[(&'a CommitId, Range<usize>)],
183    diff: &ContentDiff,
184) -> HashMap<&'a CommitId, Vec<SelectedRange>> {
185    debug_assert!(annotation_ranges.iter().all(|(_, range)| !range.is_empty()));
186    let mut selected_ranges: HashMap<&CommitId, Vec<_>> = HashMap::new();
187    let mut diff_hunk_ranges = diff
188        .hunk_ranges()
189        .filter(|hunk| hunk.kind == DiffHunkKind::Different);
190    while !annotation_ranges.is_empty() {
191        let Some(hunk) = diff_hunk_ranges.next() else {
192            break;
193        };
194        let [left_range, right_range]: &[_; 2] = hunk.ranges[..].try_into().unwrap();
195        assert!(!left_range.is_empty() || !right_range.is_empty());
196        if right_range.is_empty() {
197            // If the hunk is pure deletion, it can be mapped to multiple
198            // overlapped annotation ranges unambiguously.
199            let skip = annotation_ranges
200                .iter()
201                .take_while(|(_, range)| range.end <= left_range.start)
202                .count();
203            annotation_ranges = &annotation_ranges[skip..];
204            let pre_overlap = annotation_ranges
205                .iter()
206                .take_while(|(_, range)| range.end < left_range.end)
207                .count();
208            let maybe_overlapped_ranges = annotation_ranges.get(..pre_overlap + 1);
209            annotation_ranges = &annotation_ranges[pre_overlap..];
210            let Some(overlapped_ranges) = maybe_overlapped_ranges else {
211                continue;
212            };
213            // Ensure that the ranges are contiguous and include the start.
214            let all_covered = overlapped_ranges
215                .iter()
216                .try_fold(left_range.start, |prev_end, (_, cur)| {
217                    (cur.start <= prev_end).then_some(cur.end)
218                })
219                .inspect(|&last_end| assert!(left_range.end <= last_end))
220                .is_some();
221            if all_covered {
222                for (commit_id, cur_range) in overlapped_ranges {
223                    let start = cmp::max(cur_range.start, left_range.start);
224                    let end = cmp::min(cur_range.end, left_range.end);
225                    assert!(start < end);
226                    let selected = selected_ranges.entry(commit_id).or_default();
227                    selected.push((start..end, right_range.clone()));
228                }
229            }
230        } else {
231            // In other cases, the hunk should be included in an annotation
232            // range to map it unambiguously. Skip any pre-overlapped ranges.
233            let skip = annotation_ranges
234                .iter()
235                .take_while(|(_, range)| range.end < left_range.end)
236                .count();
237            annotation_ranges = &annotation_ranges[skip..];
238            let Some((commit_id, cur_range)) = annotation_ranges.first() else {
239                continue;
240            };
241            let contained = cur_range.start <= left_range.start && left_range.end <= cur_range.end;
242            // If the hunk is pure insertion, it can be mapped to two distinct
243            // annotation ranges, which is ambiguous.
244            let ambiguous = cur_range.end == left_range.start
245                && annotation_ranges
246                    .get(1)
247                    .is_some_and(|(_, next_range)| next_range.start == left_range.end);
248            if contained && !ambiguous {
249                let selected = selected_ranges.entry(commit_id).or_default();
250                selected.push((left_range.clone(), right_range.clone()));
251            }
252        }
253    }
254    selected_ranges
255}
256
257/// Constructs new text by replacing `text1` range with `text2` range for each
258/// selected `(range1, range2)` pairs.
259fn combine_texts(text1: &[u8], text2: &[u8], selected_ranges: &[SelectedRange]) -> BString {
260    itertools::chain!(
261        [(0..0, 0..0)],
262        selected_ranges.iter().cloned(),
263        [(text1.len()..text1.len(), text2.len()..text2.len())],
264    )
265    .tuple_windows()
266    // Copy unchanged hunk from text1 and current hunk from text2
267    .map(|((prev1, _), (cur1, cur2))| (prev1.end..cur1.start, cur2))
268    .flat_map(|(range1, range2)| [&text1[range1], &text2[range2]])
269    .collect()
270}
271
272/// Describes changes made by [`absorb_hunks()`].
273#[derive(Clone, Debug)]
274pub struct AbsorbStats {
275    /// Rewritten source commit which the absorbed hunks were removed, or `None`
276    /// if the source commit was abandoned or no hunks were moved.
277    pub rewritten_source: Option<Commit>,
278    /// Rewritten commits which the source hunks were absorbed into, in forward
279    /// topological order.
280    pub rewritten_destinations: Vec<Commit>,
281    /// Number of descendant commits which were rebased. The number of rewritten
282    /// destination commits are not included.
283    pub num_rebased: usize,
284}
285
286/// Merges selected trees into the specified commits. Abandons the source commit
287/// if it becomes discardable.
288pub fn absorb_hunks(
289    repo: &mut MutableRepo,
290    source: &AbsorbSource,
291    mut selected_trees: HashMap<CommitId, MergedTreeBuilder>,
292) -> BackendResult<AbsorbStats> {
293    let mut rewritten_source = None;
294    let mut rewritten_destinations = Vec::new();
295    let mut num_rebased = 0;
296    // Rewrite commits in topological order so that descendant commits wouldn't
297    // be rewritten multiple times.
298    repo.transform_descendants(selected_trees.keys().cloned().collect(), async |rewriter| {
299        // Remove selected hunks from the source commit by reparent()
300        if rewriter.old_commit().id() == source.commit.id() {
301            let commit_builder = rewriter.reparent();
302            if commit_builder.is_discardable()? {
303                commit_builder.abandon();
304            } else {
305                rewritten_source = Some(commit_builder.write()?);
306                num_rebased += 1;
307            }
308            return Ok(());
309        }
310        let Some(tree_builder) = selected_trees.remove(rewriter.old_commit().id()) else {
311            rewriter.rebase().await?.write()?;
312            num_rebased += 1;
313            return Ok(());
314        };
315        // Merge hunks between source parent tree and selected tree
316        let selected_tree = tree_builder.write_tree()?;
317        let commit_builder = rewriter.rebase().await?;
318        let destination_tree = commit_builder.tree();
319        let new_tree = destination_tree
320            .merge(source.parent_tree.clone(), selected_tree)
321            .block_on()?;
322        let mut predecessors = commit_builder.predecessors().to_vec();
323        predecessors.push(source.commit.id().clone());
324        let new_commit = commit_builder
325            .set_tree(new_tree)
326            .set_predecessors(predecessors)
327            .write()?;
328        rewritten_destinations.push(new_commit);
329        Ok(())
330    })?;
331    Ok(AbsorbStats {
332        rewritten_source,
333        rewritten_destinations,
334        num_rebased,
335    })
336}
337
338fn to_file_value(value: MaterializedTreeValue) -> Result<Option<MaterializedFileValue>, String> {
339    match value {
340        MaterializedTreeValue::Absent => Ok(None), // New or deleted file
341        MaterializedTreeValue::AccessDenied(err) => Err(format!("Access is denied: {err}")),
342        MaterializedTreeValue::File(file) => Ok(Some(file)),
343        MaterializedTreeValue::Symlink { .. } => Err("Is a symlink".into()),
344        MaterializedTreeValue::FileConflict(_) | MaterializedTreeValue::OtherConflict { .. } => {
345            Err("Is a conflict".into())
346        }
347        MaterializedTreeValue::GitSubmodule(_) => Err("Is a Git submodule".into()),
348        MaterializedTreeValue::Tree(_) => panic!("diff should not contain trees"),
349    }
350}
351
352#[cfg(test)]
353mod tests {
354    use maplit::hashmap;
355
356    use super::*;
357
358    #[test]
359    fn test_split_file_hunks_empty_or_single_line() {
360        let commit_id1 = &CommitId::from_hex("111111");
361
362        // unchanged
363        assert_eq!(
364            split_file_hunks(&[], &ContentDiff::by_line(["", ""])),
365            hashmap! {}
366        );
367
368        // insert single line
369        assert_eq!(
370            split_file_hunks(&[], &ContentDiff::by_line(["", "2X\n"])),
371            hashmap! {}
372        );
373        // delete single line
374        assert_eq!(
375            split_file_hunks(&[(commit_id1, 0..3)], &ContentDiff::by_line(["1a\n", ""])),
376            hashmap! { commit_id1 => vec![(0..3, 0..0)] }
377        );
378        // modify single line
379        assert_eq!(
380            split_file_hunks(
381                &[(commit_id1, 0..3)],
382                &ContentDiff::by_line(["1a\n", "1AA\n"])
383            ),
384            hashmap! { commit_id1 => vec![(0..3, 0..4)] }
385        );
386    }
387
388    #[test]
389    fn test_split_file_hunks_single_range() {
390        let commit_id1 = &CommitId::from_hex("111111");
391
392        // insert first, middle, and last lines
393        assert_eq!(
394            split_file_hunks(
395                &[(commit_id1, 0..6)],
396                &ContentDiff::by_line(["1a\n1b\n", "1X\n1a\n1Y\n1b\n1Z\n"])
397            ),
398            hashmap! {
399                commit_id1 => vec![(0..0, 0..3), (3..3, 6..9), (6..6, 12..15)],
400            }
401        );
402        // delete first, middle, and last lines
403        assert_eq!(
404            split_file_hunks(
405                &[(commit_id1, 0..15)],
406                &ContentDiff::by_line(["1a\n1b\n1c\n1d\n1e\n1f\n", "1b\n1d\n1f\n"])
407            ),
408            hashmap! {
409                commit_id1 => vec![(0..3, 0..0), (6..9, 3..3), (12..15, 6..6)],
410            }
411        );
412        // modify non-contiguous lines
413        assert_eq!(
414            split_file_hunks(
415                &[(commit_id1, 0..12)],
416                &ContentDiff::by_line(["1a\n1b\n1c\n1d\n", "1A\n1b\n1C\n1d\n"])
417            ),
418            hashmap! { commit_id1 => vec![(0..3, 0..3), (6..9, 6..9)] }
419        );
420    }
421
422    #[test]
423    fn test_split_file_hunks_contiguous_ranges_insert() {
424        let commit_id1 = &CommitId::from_hex("111111");
425        let commit_id2 = &CommitId::from_hex("222222");
426
427        // insert first line
428        assert_eq!(
429            split_file_hunks(
430                &[(commit_id1, 0..6), (commit_id2, 6..12)],
431                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1X\n1a\n1b\n2a\n2b\n"])
432            ),
433            hashmap! { commit_id1 => vec![(0..0, 0..3)] }
434        );
435        // insert middle line to first range
436        assert_eq!(
437            split_file_hunks(
438                &[(commit_id1, 0..6), (commit_id2, 6..12)],
439                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1X\n1b\n2a\n2b\n"])
440            ),
441            hashmap! { commit_id1 => vec![(3..3, 3..6)] }
442        );
443        // insert middle line between ranges (ambiguous)
444        assert_eq!(
445            split_file_hunks(
446                &[(commit_id1, 0..6), (commit_id2, 6..12)],
447                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n3X\n2a\n2b\n"])
448            ),
449            hashmap! {}
450        );
451        // insert middle line to second range
452        assert_eq!(
453            split_file_hunks(
454                &[(commit_id1, 0..6), (commit_id2, 6..12)],
455                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2X\n2b\n"])
456            ),
457            hashmap! { commit_id2 => vec![(9..9, 9..12)] }
458        );
459        // insert last line
460        assert_eq!(
461            split_file_hunks(
462                &[(commit_id1, 0..6), (commit_id2, 6..12)],
463                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2b\n2X\n"])
464            ),
465            hashmap! { commit_id2 => vec![(12..12, 12..15)] }
466        );
467    }
468
469    #[test]
470    fn test_split_file_hunks_contiguous_ranges_delete() {
471        let commit_id1 = &CommitId::from_hex("111111");
472        let commit_id2 = &CommitId::from_hex("222222");
473
474        // delete first line
475        assert_eq!(
476            split_file_hunks(
477                &[(commit_id1, 0..6), (commit_id2, 6..12)],
478                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1b\n2a\n2b\n"])
479            ),
480            hashmap! { commit_id1 => vec![(0..3, 0..0)] }
481        );
482        // delete middle line from first range
483        assert_eq!(
484            split_file_hunks(
485                &[(commit_id1, 0..6), (commit_id2, 6..12)],
486                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n2a\n2b\n"])
487            ),
488            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
489        );
490        // delete middle line from second range
491        assert_eq!(
492            split_file_hunks(
493                &[(commit_id1, 0..6), (commit_id2, 6..12)],
494                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2b\n"])
495            ),
496            hashmap! { commit_id2 => vec![(6..9, 6..6)] }
497        );
498        // delete last line
499        assert_eq!(
500            split_file_hunks(
501                &[(commit_id1, 0..6), (commit_id2, 6..12)],
502                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n"])
503            ),
504            hashmap! { commit_id2 => vec![(9..12, 9..9)] }
505        );
506        // delete first and last lines
507        assert_eq!(
508            split_file_hunks(
509                &[(commit_id1, 0..6), (commit_id2, 6..12)],
510                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1b\n2a\n"])
511            ),
512            hashmap! {
513                commit_id1 => vec![(0..3, 0..0)],
514                commit_id2 => vec![(9..12, 6..6)],
515            }
516        );
517
518        // delete across ranges (split first annotation range)
519        assert_eq!(
520            split_file_hunks(
521                &[(commit_id1, 0..6), (commit_id2, 6..12)],
522                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n"])
523            ),
524            hashmap! {
525                commit_id1 => vec![(3..6, 3..3)],
526                commit_id2 => vec![(6..12, 3..3)],
527            }
528        );
529        // delete middle lines across ranges (split both annotation ranges)
530        assert_eq!(
531            split_file_hunks(
532                &[(commit_id1, 0..6), (commit_id2, 6..12)],
533                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n2b\n"])
534            ),
535            hashmap! {
536                commit_id1 => vec![(3..6, 3..3)],
537                commit_id2 => vec![(6..9, 3..3)],
538            }
539        );
540        // delete across ranges (split second annotation range)
541        assert_eq!(
542            split_file_hunks(
543                &[(commit_id1, 0..6), (commit_id2, 6..12)],
544                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "2b\n"])
545            ),
546            hashmap! {
547                commit_id1 => vec![(0..6, 0..0)],
548                commit_id2 => vec![(6..9, 0..0)],
549            }
550        );
551
552        // delete all
553        assert_eq!(
554            split_file_hunks(
555                &[(commit_id1, 0..6), (commit_id2, 6..12)],
556                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", ""])
557            ),
558            hashmap! {
559                commit_id1 => vec![(0..6, 0..0)],
560                commit_id2 => vec![(6..12, 0..0)],
561            }
562        );
563    }
564
565    #[test]
566    fn test_split_file_hunks_contiguous_ranges_modify() {
567        let commit_id1 = &CommitId::from_hex("111111");
568        let commit_id2 = &CommitId::from_hex("222222");
569
570        // modify first line
571        assert_eq!(
572            split_file_hunks(
573                &[(commit_id1, 0..6), (commit_id2, 6..12)],
574                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2a\n2b\n"])
575            ),
576            hashmap! { commit_id1 => vec![(0..3, 0..3)] }
577        );
578        // modify middle line of first range
579        assert_eq!(
580            split_file_hunks(
581                &[(commit_id1, 0..6), (commit_id2, 6..12)],
582                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2a\n2b\n"])
583            ),
584            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
585        );
586        // modify middle lines of both ranges (ambiguous)
587        // ('hg absorb' accepts this)
588        assert_eq!(
589            split_file_hunks(
590                &[(commit_id1, 0..6), (commit_id2, 6..12)],
591                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2A\n2b\n"])
592            ),
593            hashmap! {}
594        );
595        // modify middle line of second range
596        assert_eq!(
597            split_file_hunks(
598                &[(commit_id1, 0..6), (commit_id2, 6..12)],
599                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2A\n2b\n"])
600            ),
601            hashmap! { commit_id2 => vec![(6..9, 6..9)] }
602        );
603        // modify last line
604        assert_eq!(
605            split_file_hunks(
606                &[(commit_id1, 0..6), (commit_id2, 6..12)],
607                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2B\n"])
608            ),
609            hashmap! { commit_id2 => vec![(9..12, 9..12)] }
610        );
611        // modify first and last lines
612        assert_eq!(
613            split_file_hunks(
614                &[(commit_id1, 0..6), (commit_id2, 6..12)],
615                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2a\n2B\n"])
616            ),
617            hashmap! {
618                commit_id1 => vec![(0..3, 0..3)],
619                commit_id2 => vec![(9..12, 9..12)],
620            }
621        );
622    }
623
624    #[test]
625    fn test_split_file_hunks_contiguous_ranges_modify_insert() {
626        let commit_id1 = &CommitId::from_hex("111111");
627        let commit_id2 = &CommitId::from_hex("222222");
628
629        // modify first range, insert adjacent middle line
630        assert_eq!(
631            split_file_hunks(
632                &[(commit_id1, 0..6), (commit_id2, 6..12)],
633                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1B\n1X\n2a\n2b\n"])
634            ),
635            hashmap! { commit_id1 => vec![(0..6, 0..9)] }
636        );
637        // modify second range, insert adjacent middle line
638        assert_eq!(
639            split_file_hunks(
640                &[(commit_id1, 0..6), (commit_id2, 6..12)],
641                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2X\n2A\n2B\n"])
642            ),
643            hashmap! { commit_id2 => vec![(6..12, 6..15)] }
644        );
645        // modify second range, insert last line
646        assert_eq!(
647            split_file_hunks(
648                &[(commit_id1, 0..6), (commit_id2, 6..12)],
649                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2A\n2B\n2X\n"])
650            ),
651            hashmap! { commit_id2 => vec![(6..12, 6..15)] }
652        );
653        // modify first and last lines (unambiguous), insert middle line between
654        // ranges (ambiguous)
655        assert_eq!(
656            split_file_hunks(
657                &[(commit_id1, 0..6), (commit_id2, 6..12)],
658                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n3X\n2a\n2B\n"])
659            ),
660            hashmap! {
661                commit_id1 => vec![(0..3, 0..3)],
662                commit_id2 => vec![(9..12, 12..15)],
663            }
664        );
665    }
666
667    #[test]
668    fn test_split_file_hunks_contiguous_ranges_modify_delete() {
669        let commit_id1 = &CommitId::from_hex("111111");
670        let commit_id2 = &CommitId::from_hex("222222");
671
672        // modify first line, delete adjacent middle line
673        assert_eq!(
674            split_file_hunks(
675                &[(commit_id1, 0..6), (commit_id2, 6..12)],
676                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n2a\n2b\n"])
677            ),
678            hashmap! { commit_id1 => vec![(0..6, 0..3)] }
679        );
680        // modify last line, delete adjacent middle line
681        assert_eq!(
682            split_file_hunks(
683                &[(commit_id1, 0..6), (commit_id2, 6..12)],
684                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2B\n"])
685            ),
686            hashmap! { commit_id2 => vec![(6..12, 6..9)] }
687        );
688        // modify first and last lines, delete middle line from first range
689        assert_eq!(
690            split_file_hunks(
691                &[(commit_id1, 0..6), (commit_id2, 6..12)],
692                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n2a\n2B\n"])
693            ),
694            hashmap! {
695                commit_id1 => vec![(0..6, 0..3)],
696                commit_id2 => vec![(9..12, 6..9)],
697            }
698        );
699        // modify first and last lines, delete middle line from second range
700        assert_eq!(
701            split_file_hunks(
702                &[(commit_id1, 0..6), (commit_id2, 6..12)],
703                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2B\n"])
704            ),
705            hashmap! {
706                commit_id1 => vec![(0..3, 0..3)],
707                commit_id2 => vec![(6..12, 6..9)],
708            }
709        );
710        // modify middle line, delete adjacent middle line (ambiguous)
711        assert_eq!(
712            split_file_hunks(
713                &[(commit_id1, 0..6), (commit_id2, 6..12)],
714                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2b\n"])
715            ),
716            hashmap! {}
717        );
718    }
719
720    #[test]
721    fn test_split_file_hunks_non_contiguous_ranges_insert() {
722        let commit_id1 = &CommitId::from_hex("111111");
723        let commit_id2 = &CommitId::from_hex("222222");
724
725        // insert middle line to first range
726        assert_eq!(
727            split_file_hunks(
728                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
729                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0a\n2a\n2b\n"])
730            ),
731            hashmap! { commit_id1 => vec![(6..6, 6..9)] }
732        );
733        // insert middle line to second range
734        assert_eq!(
735            split_file_hunks(
736                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
737                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2X\n2a\n2b\n"])
738            ),
739            hashmap! { commit_id2 => vec![(9..9, 9..12)] }
740        );
741        // insert middle lines to both ranges
742        assert_eq!(
743            split_file_hunks(
744                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
745                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0a\n2X\n2a\n2b\n"])
746            ),
747            hashmap! {
748                commit_id1 => vec![(6..6, 6..9)],
749                commit_id2 => vec![(9..9, 12..15)],
750            }
751        );
752    }
753
754    #[test]
755    fn test_split_file_hunks_non_contiguous_ranges_insert_modify_masked() {
756        let commit_id1 = &CommitId::from_hex("111111");
757        let commit_id2 = &CommitId::from_hex("222222");
758
759        // insert middle line to first range, modify masked line (ambiguous)
760        assert_eq!(
761            split_file_hunks(
762                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
763                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0A\n2a\n2b\n"])
764            ),
765            hashmap! {}
766        );
767        // insert middle line to second range, modify masked line (ambiguous)
768        assert_eq!(
769            split_file_hunks(
770                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
771                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2X\n2a\n2b\n"])
772            ),
773            hashmap! {}
774        );
775        // insert middle lines to both ranges, modify masked line (ambiguous)
776        assert_eq!(
777            split_file_hunks(
778                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
779                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0A\n2X\n2a\n2b\n"])
780            ),
781            hashmap! {}
782        );
783    }
784
785    #[test]
786    fn test_split_file_hunks_non_contiguous_ranges_delete() {
787        let commit_id1 = &CommitId::from_hex("111111");
788        let commit_id2 = &CommitId::from_hex("222222");
789
790        // delete middle line from first range
791        assert_eq!(
792            split_file_hunks(
793                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
794                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0a\n2a\n2b\n"])
795            ),
796            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
797        );
798        // delete middle line from second range
799        assert_eq!(
800            split_file_hunks(
801                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
802                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2b\n"])
803            ),
804            hashmap! { commit_id2 => vec![(9..12, 9..9)] }
805        );
806        // delete middle lines from both ranges
807        assert_eq!(
808            split_file_hunks(
809                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
810                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0a\n2b\n"])
811            ),
812            hashmap! {
813                commit_id1 => vec![(3..6, 3..3)],
814                commit_id2 => vec![(9..12, 6..6)],
815            }
816        );
817    }
818
819    #[test]
820    fn test_split_file_hunks_non_contiguous_ranges_delete_modify_masked() {
821        let commit_id1 = &CommitId::from_hex("111111");
822        let commit_id2 = &CommitId::from_hex("222222");
823
824        // delete middle line from first range, modify masked line (ambiguous)
825        assert_eq!(
826            split_file_hunks(
827                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
828                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0A\n2a\n2b\n"])
829            ),
830            hashmap! {}
831        );
832        // delete middle line from second range, modify masked line (ambiguous)
833        assert_eq!(
834            split_file_hunks(
835                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
836                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2b\n"])
837            ),
838            hashmap! {}
839        );
840        // delete middle lines from both ranges, modify masked line (ambiguous)
841        assert_eq!(
842            split_file_hunks(
843                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
844                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0A\n2b\n"])
845            ),
846            hashmap! {}
847        );
848    }
849
850    #[test]
851    fn test_split_file_hunks_non_contiguous_ranges_delete_delete_masked() {
852        let commit_id1 = &CommitId::from_hex("111111");
853        let commit_id2 = &CommitId::from_hex("222222");
854
855        // 'hg absorb' accepts these, but it seems better to reject them as
856        // ambiguous. Masked lines cannot be deleted.
857
858        // delete middle line from first range, delete masked line (ambiguous)
859        assert_eq!(
860            split_file_hunks(
861                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
862                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n2a\n2b\n"])
863            ),
864            hashmap! {}
865        );
866        // delete middle line from second range, delete masked line (ambiguous)
867        assert_eq!(
868            split_file_hunks(
869                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
870                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n2b\n"])
871            ),
872            hashmap! {}
873        );
874        // delete middle lines from both ranges, delete masked line (ambiguous)
875        assert_eq!(
876            split_file_hunks(
877                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
878                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n2b\n"])
879            ),
880            hashmap! {}
881        );
882    }
883
884    #[test]
885    fn test_split_file_hunks_non_contiguous_ranges_modify() {
886        let commit_id1 = &CommitId::from_hex("111111");
887        let commit_id2 = &CommitId::from_hex("222222");
888
889        // modify middle line of first range
890        assert_eq!(
891            split_file_hunks(
892                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
893                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0a\n2a\n2b\n"])
894            ),
895            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
896        );
897        // modify middle line of second range
898        assert_eq!(
899            split_file_hunks(
900                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
901                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2A\n2b\n"])
902            ),
903            hashmap! { commit_id2 => vec![(9..12, 9..12)] }
904        );
905        // modify middle lines of both ranges
906        assert_eq!(
907            split_file_hunks(
908                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
909                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0a\n2A\n2b\n"])
910            ),
911            hashmap! {
912                commit_id1 => vec![(3..6, 3..6)],
913                commit_id2 => vec![(9..12, 9..12)],
914            }
915        );
916    }
917
918    #[test]
919    fn test_split_file_hunks_non_contiguous_ranges_modify_modify_masked() {
920        let commit_id1 = &CommitId::from_hex("111111");
921        let commit_id2 = &CommitId::from_hex("222222");
922
923        // modify middle line of first range, modify masked line (ambiguous)
924        assert_eq!(
925            split_file_hunks(
926                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
927                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0A\n2a\n2b\n"])
928            ),
929            hashmap! {}
930        );
931        // modify middle line of second range, modify masked line (ambiguous)
932        assert_eq!(
933            split_file_hunks(
934                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
935                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2A\n2b\n"])
936            ),
937            hashmap! {}
938        );
939        // modify middle lines to both ranges, modify masked line (ambiguous)
940        assert_eq!(
941            split_file_hunks(
942                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
943                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0A\n2A\n2b\n"])
944            ),
945            hashmap! {}
946        );
947    }
948
949    #[test]
950    fn test_split_file_hunks_non_contiguous_tail_range_insert() {
951        let commit_id1 = &CommitId::from_hex("111111");
952
953        // insert middle line to range
954        assert_eq!(
955            split_file_hunks(
956                &[(commit_id1, 0..6) /* , 6..9 */],
957                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1b\n1X\n0a\n"])
958            ),
959            hashmap! { commit_id1 => vec![(6..6, 6..9)] }
960        );
961    }
962
963    #[test]
964    fn test_split_file_hunks_non_contiguous_tail_range_insert_modify_masked() {
965        let commit_id1 = &CommitId::from_hex("111111");
966
967        // insert middle line to range, modify masked line (ambiguous)
968        assert_eq!(
969            split_file_hunks(
970                &[(commit_id1, 0..6) /* , 6..9 */],
971                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1b\n1X\n0A\n"])
972            ),
973            hashmap! {}
974        );
975    }
976
977    #[test]
978    fn test_split_file_hunks_non_contiguous_tail_range_delete() {
979        let commit_id1 = &CommitId::from_hex("111111");
980
981        // delete middle line from range
982        assert_eq!(
983            split_file_hunks(
984                &[(commit_id1, 0..6) /* , 6..9 */],
985                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n0a\n"])
986            ),
987            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
988        );
989        // delete all lines from range
990        assert_eq!(
991            split_file_hunks(
992                &[(commit_id1, 0..6) /* , 6..9 */],
993                &ContentDiff::by_line(["1a\n1b\n0a\n", "0a\n"])
994            ),
995            hashmap! { commit_id1 => vec![(0..6, 0..0)] }
996        );
997    }
998
999    #[test]
1000    fn test_split_file_hunks_non_contiguous_tail_range_delete_modify_masked() {
1001        let commit_id1 = &CommitId::from_hex("111111");
1002
1003        // delete middle line from range, modify masked line (ambiguous)
1004        assert_eq!(
1005            split_file_hunks(
1006                &[(commit_id1, 0..6) /* , 6..9 */],
1007                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n0A\n"])
1008            ),
1009            hashmap! {}
1010        );
1011        // delete all lines from range, modify masked line (ambiguous)
1012        assert_eq!(
1013            split_file_hunks(
1014                &[(commit_id1, 0..6) /* , 6..9 */],
1015                &ContentDiff::by_line(["1a\n1b\n0a\n", "0A\n"])
1016            ),
1017            hashmap! {}
1018        );
1019    }
1020
1021    #[test]
1022    fn test_split_file_hunks_non_contiguous_tail_range_delete_delete_masked() {
1023        let commit_id1 = &CommitId::from_hex("111111");
1024
1025        // 'hg absorb' accepts these, but it seems better to reject them as
1026        // ambiguous. Masked lines cannot be deleted.
1027
1028        // delete middle line from range, delete masked line (ambiguous)
1029        assert_eq!(
1030            split_file_hunks(
1031                &[(commit_id1, 0..6) /* , 6..9 */],
1032                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n"])
1033            ),
1034            hashmap! {}
1035        );
1036        // delete all lines from range, delete masked line (ambiguous)
1037        assert_eq!(
1038            split_file_hunks(
1039                &[(commit_id1, 0..6) /* , 6..9 */],
1040                &ContentDiff::by_line(["1a\n1b\n0a\n", ""])
1041            ),
1042            hashmap! {}
1043        );
1044    }
1045
1046    #[test]
1047    fn test_split_file_hunks_non_contiguous_tail_range_modify() {
1048        let commit_id1 = &CommitId::from_hex("111111");
1049
1050        // modify middle line of range
1051        assert_eq!(
1052            split_file_hunks(
1053                &[(commit_id1, 0..6) /* , 6..9 */],
1054                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1B\n0a\n"])
1055            ),
1056            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
1057        );
1058    }
1059
1060    #[test]
1061    fn test_split_file_hunks_non_contiguous_tail_range_modify_modify_masked() {
1062        let commit_id1 = &CommitId::from_hex("111111");
1063
1064        // modify middle line of range, modify masked line (ambiguous)
1065        assert_eq!(
1066            split_file_hunks(
1067                &[(commit_id1, 0..6) /* , 6..9 */],
1068                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1B\n0A\n"])
1069            ),
1070            hashmap! {}
1071        );
1072    }
1073
1074    #[test]
1075    fn test_split_file_hunks_multiple_edits() {
1076        let commit_id1 = &CommitId::from_hex("111111");
1077        let commit_id2 = &CommitId::from_hex("222222");
1078        let commit_id3 = &CommitId::from_hex("333333");
1079
1080        assert_eq!(
1081            split_file_hunks(
1082                &[
1083                    (commit_id1, 0..3),   // 1a       => 1A
1084                    (commit_id2, 3..6),   // 2a       => 2a
1085                    (commit_id1, 6..15),  // 1b 1c 1d => 1B 1d
1086                    (commit_id3, 15..21), // 3a 3b    => 3X 3A 3b 3Y
1087                ],
1088                &ContentDiff::by_line([
1089                    "1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1090                    "1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n"
1091                ])
1092            ),
1093            hashmap! {
1094                commit_id1 => vec![(0..3, 0..3), (6..12, 6..9)],
1095                commit_id3 => vec![(15..18, 12..18), (21..21, 21..24)],
1096            }
1097        );
1098    }
1099
1100    #[test]
1101    fn test_combine_texts() {
1102        assert_eq!(combine_texts(b"", b"", &[]), "");
1103        assert_eq!(combine_texts(b"foo", b"bar", &[]), "foo");
1104        assert_eq!(combine_texts(b"foo", b"bar", &[(0..3, 0..3)]), "bar");
1105
1106        assert_eq!(
1107            combine_texts(
1108                b"1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1109                b"1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n",
1110                &[(0..3, 0..3), (6..12, 6..9)]
1111            ),
1112            "1A\n2a\n1B\n1d\n3a\n3b\n"
1113        );
1114        assert_eq!(
1115            combine_texts(
1116                b"1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1117                b"1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n",
1118                &[(15..18, 12..18), (21..21, 21..24)]
1119            ),
1120            "1a\n2a\n1b\n1c\n1d\n3X\n3A\n3b\n3Y\n"
1121        );
1122    }
1123}