jj_lib/
absorb.rs

1// Copyright 2024 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Algorithm to split changes in a single source commit into its most relevant
16//! ancestors, 'absorbing' them away.
17
18use std::cmp;
19use std::collections::HashMap;
20use std::ops::Range;
21use std::sync::Arc;
22
23use bstr::BString;
24use futures::StreamExt as _;
25use itertools::Itertools as _;
26use pollster::FutureExt as _;
27use thiserror::Error;
28
29use crate::annotate::FileAnnotator;
30use crate::backend::BackendError;
31use crate::backend::BackendResult;
32use crate::backend::CommitId;
33use crate::backend::TreeValue;
34use crate::commit::Commit;
35use crate::conflicts::MaterializedFileValue;
36use crate::conflicts::MaterializedTreeValue;
37use crate::conflicts::materialized_diff_stream;
38use crate::copies::CopyRecords;
39use crate::diff::ContentDiff;
40use crate::diff::DiffHunkKind;
41use crate::matchers::Matcher;
42use crate::merge::Merge;
43use crate::merged_tree::MergedTree;
44use crate::merged_tree::MergedTreeBuilder;
45use crate::repo::MutableRepo;
46use crate::repo::Repo;
47use crate::repo_path::RepoPathBuf;
48use crate::revset::ResolvedRevsetExpression;
49use crate::revset::RevsetEvaluationError;
50
51/// The source commit to absorb into its ancestry.
52#[derive(Clone, Debug)]
53pub struct AbsorbSource {
54    commit: Commit,
55    parent_tree: MergedTree,
56}
57
58impl AbsorbSource {
59    /// Create an absorb source from a single commit.
60    pub fn from_commit(repo: &dyn Repo, commit: Commit) -> BackendResult<Self> {
61        let parent_tree = commit.parent_tree(repo)?;
62        Ok(Self {
63            commit,
64            parent_tree,
65        })
66    }
67}
68
69/// Error splitting an absorb source into modified ancestry trees.
70#[derive(Debug, Error)]
71pub enum AbsorbError {
72    /// Error while contacting the Backend.
73    #[error(transparent)]
74    Backend(#[from] BackendError),
75    /// Error resolving commit ancestry.
76    #[error(transparent)]
77    RevsetEvaluation(#[from] RevsetEvaluationError),
78}
79
80/// An absorb 'plan' indicating which commits should be modified and what they
81/// should be modified to.
82#[derive(Default)]
83pub struct SelectedTrees {
84    /// Commits to be modified, to be passed to `absorb_hunks`.
85    pub target_commits: HashMap<CommitId, MergedTreeBuilder>,
86    /// Paths that were not absorbed for various error reasons.
87    pub skipped_paths: Vec<(RepoPathBuf, String)>,
88}
89
90/// Builds trees to be merged into destination commits by splitting source
91/// changes based on file annotation.
92pub async fn split_hunks_to_trees(
93    repo: &dyn Repo,
94    source: &AbsorbSource,
95    destinations: &Arc<ResolvedRevsetExpression>,
96    matcher: &dyn Matcher,
97) -> Result<SelectedTrees, AbsorbError> {
98    let mut selected_trees = SelectedTrees::default();
99
100    let left_tree = &source.parent_tree;
101    let right_tree = source.commit.tree_async().await?;
102    // TODO: enable copy tracking if we add support for annotate and merge
103    let copy_records = CopyRecords::default();
104    let tree_diff = left_tree.diff_stream_with_copies(&right_tree, matcher, &copy_records);
105    let mut diff_stream = materialized_diff_stream(repo.store(), tree_diff);
106    while let Some(entry) = diff_stream.next().await {
107        let left_path = entry.path.source();
108        let right_path = entry.path.target();
109        let (left_value, right_value) = entry.values?;
110        let (left_text, executable, copy_id) = match to_file_value(left_value) {
111            Ok(Some(mut value)) => (
112                value.read_all(left_path).await?,
113                value.executable,
114                value.copy_id,
115            ),
116            // New file should have no destinations
117            Ok(None) => continue,
118            Err(reason) => {
119                selected_trees
120                    .skipped_paths
121                    .push((left_path.to_owned(), reason));
122                continue;
123            }
124        };
125        let (right_text, deleted) = match to_file_value(right_value) {
126            Ok(Some(mut value)) => (value.read_all(right_path).await?, false),
127            Ok(None) => (vec![], true),
128            Err(reason) => {
129                selected_trees
130                    .skipped_paths
131                    .push((right_path.to_owned(), reason));
132                continue;
133            }
134        };
135
136        // Compute annotation of parent (= left) content to map right hunks
137        let mut annotator =
138            FileAnnotator::with_file_content(source.commit.id(), left_path, left_text.clone());
139        annotator.compute(repo, destinations)?;
140        let annotation = annotator.to_annotation();
141        let annotation_ranges = annotation
142            .compact_line_ranges()
143            .filter_map(|(commit_id, range)| Some((commit_id.ok()?, range)))
144            .collect_vec();
145        let diff = ContentDiff::by_line([&left_text, &right_text]);
146        let selected_ranges = split_file_hunks(&annotation_ranges, &diff);
147        // Build trees containing parent (= left) contents + selected hunks
148        for (&commit_id, ranges) in &selected_ranges {
149            let tree_builder = selected_trees
150                .target_commits
151                .entry(commit_id.clone())
152                .or_insert_with(|| MergedTreeBuilder::new(left_tree.id().clone()));
153            let new_text = combine_texts(&left_text, &right_text, ranges);
154            // Since changes to be absorbed are represented as diffs relative to
155            // the source parent, we can propagate file deletion only if the
156            // whole file content is deleted at a single destination commit.
157            let new_tree_value = if new_text.is_empty() && deleted {
158                Merge::absent()
159            } else {
160                let id = repo
161                    .store()
162                    .write_file(left_path, &mut new_text.as_slice())
163                    .await?;
164                Merge::normal(TreeValue::File {
165                    id,
166                    executable,
167                    copy_id: copy_id.clone(),
168                })
169            };
170            tree_builder.set_or_remove(left_path.to_owned(), new_tree_value);
171        }
172    }
173
174    Ok(selected_trees)
175}
176
177type SelectedRange = (Range<usize>, Range<usize>);
178
179/// Maps `diff` hunks to commits based on the left `annotation_ranges`. The
180/// `annotation_ranges` should be compacted.
181fn split_file_hunks<'a>(
182    mut annotation_ranges: &[(&'a CommitId, Range<usize>)],
183    diff: &ContentDiff,
184) -> HashMap<&'a CommitId, Vec<SelectedRange>> {
185    debug_assert!(annotation_ranges.iter().all(|(_, range)| !range.is_empty()));
186    let mut selected_ranges: HashMap<&CommitId, Vec<_>> = HashMap::new();
187    let mut diff_hunk_ranges = diff
188        .hunk_ranges()
189        .filter(|hunk| hunk.kind == DiffHunkKind::Different);
190    while !annotation_ranges.is_empty() {
191        let Some(hunk) = diff_hunk_ranges.next() else {
192            break;
193        };
194        let [left_range, right_range]: &[_; 2] = hunk.ranges[..].try_into().unwrap();
195        assert!(!left_range.is_empty() || !right_range.is_empty());
196        if right_range.is_empty() {
197            // If the hunk is pure deletion, it can be mapped to multiple
198            // overlapped annotation ranges unambiguously.
199            let skip = annotation_ranges
200                .iter()
201                .take_while(|(_, range)| range.end <= left_range.start)
202                .count();
203            annotation_ranges = &annotation_ranges[skip..];
204            let pre_overlap = annotation_ranges
205                .iter()
206                .take_while(|(_, range)| range.end < left_range.end)
207                .count();
208            let maybe_overlapped_ranges = annotation_ranges.get(..pre_overlap + 1);
209            annotation_ranges = &annotation_ranges[pre_overlap..];
210            let Some(overlapped_ranges) = maybe_overlapped_ranges else {
211                continue;
212            };
213            // Ensure that the ranges are contiguous and include the start.
214            let all_covered = overlapped_ranges
215                .iter()
216                .try_fold(left_range.start, |prev_end, (_, cur)| {
217                    (cur.start <= prev_end).then_some(cur.end)
218                })
219                .inspect(|&last_end| assert!(left_range.end <= last_end))
220                .is_some();
221            if all_covered {
222                for (commit_id, cur_range) in overlapped_ranges {
223                    let start = cmp::max(cur_range.start, left_range.start);
224                    let end = cmp::min(cur_range.end, left_range.end);
225                    assert!(start < end);
226                    let selected = selected_ranges.entry(commit_id).or_default();
227                    selected.push((start..end, right_range.clone()));
228                }
229            }
230        } else {
231            // In other cases, the hunk should be included in an annotation
232            // range to map it unambiguously. Skip any pre-overlapped ranges.
233            let skip = annotation_ranges
234                .iter()
235                .take_while(|(_, range)| range.end < left_range.end)
236                .count();
237            annotation_ranges = &annotation_ranges[skip..];
238            let Some((commit_id, cur_range)) = annotation_ranges.first() else {
239                continue;
240            };
241            let contained = cur_range.start <= left_range.start && left_range.end <= cur_range.end;
242            // If the hunk is pure insertion, it can be mapped to two distinct
243            // annotation ranges, which is ambiguous.
244            let ambiguous = cur_range.end == left_range.start
245                && annotation_ranges
246                    .get(1)
247                    .is_some_and(|(_, next_range)| next_range.start == left_range.end);
248            if contained && !ambiguous {
249                let selected = selected_ranges.entry(commit_id).or_default();
250                selected.push((left_range.clone(), right_range.clone()));
251            }
252        }
253    }
254    selected_ranges
255}
256
257/// Constructs new text by replacing `text1` range with `text2` range for each
258/// selected `(range1, range2)` pairs.
259fn combine_texts(text1: &[u8], text2: &[u8], selected_ranges: &[SelectedRange]) -> BString {
260    itertools::chain!(
261        [(0..0, 0..0)],
262        selected_ranges.iter().cloned(),
263        [(text1.len()..text1.len(), text2.len()..text2.len())],
264    )
265    .tuple_windows()
266    // Copy unchanged hunk from text1 and current hunk from text2
267    .map(|((prev1, _), (cur1, cur2))| (prev1.end..cur1.start, cur2))
268    .flat_map(|(range1, range2)| [&text1[range1], &text2[range2]])
269    .collect()
270}
271
272/// Describes changes made by [`absorb_hunks()`].
273#[derive(Clone, Debug)]
274pub struct AbsorbStats {
275    /// Rewritten source commit which the absorbed hunks were removed, or `None`
276    /// if the source commit was abandoned or no hunks were moved.
277    pub rewritten_source: Option<Commit>,
278    /// Rewritten commits which the source hunks were absorbed into, in forward
279    /// topological order.
280    pub rewritten_destinations: Vec<Commit>,
281    /// Number of descendant commits which were rebased. The number of rewritten
282    /// destination commits are not included.
283    pub num_rebased: usize,
284}
285
286/// Merges selected trees into the specified commits. Abandons the source commit
287/// if it becomes discardable.
288pub fn absorb_hunks(
289    repo: &mut MutableRepo,
290    source: &AbsorbSource,
291    mut selected_trees: HashMap<CommitId, MergedTreeBuilder>,
292) -> BackendResult<AbsorbStats> {
293    let store = repo.store().clone();
294    let mut rewritten_source = None;
295    let mut rewritten_destinations = Vec::new();
296    let mut num_rebased = 0;
297    // Rewrite commits in topological order so that descendant commits wouldn't
298    // be rewritten multiple times.
299    repo.transform_descendants(selected_trees.keys().cloned().collect(), async |rewriter| {
300        // Remove selected hunks from the source commit by reparent()
301        if rewriter.old_commit().id() == source.commit.id() {
302            let commit_builder = rewriter.reparent();
303            if commit_builder.is_discardable()? {
304                commit_builder.abandon();
305            } else {
306                rewritten_source = Some(commit_builder.write()?);
307                num_rebased += 1;
308            }
309            return Ok(());
310        }
311        let Some(tree_builder) = selected_trees.remove(rewriter.old_commit().id()) else {
312            rewriter.rebase().await?.write()?;
313            num_rebased += 1;
314            return Ok(());
315        };
316        // Merge hunks between source parent tree and selected tree
317        let selected_tree_id = tree_builder.write_tree(&store)?;
318        let commit_builder = rewriter.rebase().await?;
319        let destination_tree = store.get_root_tree(commit_builder.tree_id())?;
320        let selected_tree = store.get_root_tree(&selected_tree_id)?;
321        let new_tree = destination_tree
322            .merge(source.parent_tree.clone(), selected_tree)
323            .block_on()?;
324        let mut predecessors = commit_builder.predecessors().to_vec();
325        predecessors.push(source.commit.id().clone());
326        let new_commit = commit_builder
327            .set_tree_id(new_tree.id())
328            .set_predecessors(predecessors)
329            .write()?;
330        rewritten_destinations.push(new_commit);
331        Ok(())
332    })?;
333    Ok(AbsorbStats {
334        rewritten_source,
335        rewritten_destinations,
336        num_rebased,
337    })
338}
339
340fn to_file_value(value: MaterializedTreeValue) -> Result<Option<MaterializedFileValue>, String> {
341    match value {
342        MaterializedTreeValue::Absent => Ok(None), // New or deleted file
343        MaterializedTreeValue::AccessDenied(err) => Err(format!("Access is denied: {err}")),
344        MaterializedTreeValue::File(file) => Ok(Some(file)),
345        MaterializedTreeValue::Symlink { .. } => Err("Is a symlink".into()),
346        MaterializedTreeValue::FileConflict(_) | MaterializedTreeValue::OtherConflict { .. } => {
347            Err("Is a conflict".into())
348        }
349        MaterializedTreeValue::GitSubmodule(_) => Err("Is a Git submodule".into()),
350        MaterializedTreeValue::Tree(_) => panic!("diff should not contain trees"),
351    }
352}
353
354#[cfg(test)]
355mod tests {
356    use maplit::hashmap;
357
358    use super::*;
359
360    #[test]
361    fn test_split_file_hunks_empty_or_single_line() {
362        let commit_id1 = &CommitId::from_hex("111111");
363
364        // unchanged
365        assert_eq!(
366            split_file_hunks(&[], &ContentDiff::by_line(["", ""])),
367            hashmap! {}
368        );
369
370        // insert single line
371        assert_eq!(
372            split_file_hunks(&[], &ContentDiff::by_line(["", "2X\n"])),
373            hashmap! {}
374        );
375        // delete single line
376        assert_eq!(
377            split_file_hunks(&[(commit_id1, 0..3)], &ContentDiff::by_line(["1a\n", ""])),
378            hashmap! { commit_id1 => vec![(0..3, 0..0)] }
379        );
380        // modify single line
381        assert_eq!(
382            split_file_hunks(
383                &[(commit_id1, 0..3)],
384                &ContentDiff::by_line(["1a\n", "1AA\n"])
385            ),
386            hashmap! { commit_id1 => vec![(0..3, 0..4)] }
387        );
388    }
389
390    #[test]
391    fn test_split_file_hunks_single_range() {
392        let commit_id1 = &CommitId::from_hex("111111");
393
394        // insert first, middle, and last lines
395        assert_eq!(
396            split_file_hunks(
397                &[(commit_id1, 0..6)],
398                &ContentDiff::by_line(["1a\n1b\n", "1X\n1a\n1Y\n1b\n1Z\n"])
399            ),
400            hashmap! {
401                commit_id1 => vec![(0..0, 0..3), (3..3, 6..9), (6..6, 12..15)],
402            }
403        );
404        // delete first, middle, and last lines
405        assert_eq!(
406            split_file_hunks(
407                &[(commit_id1, 0..15)],
408                &ContentDiff::by_line(["1a\n1b\n1c\n1d\n1e\n1f\n", "1b\n1d\n1f\n"])
409            ),
410            hashmap! {
411                commit_id1 => vec![(0..3, 0..0), (6..9, 3..3), (12..15, 6..6)],
412            }
413        );
414        // modify non-contiguous lines
415        assert_eq!(
416            split_file_hunks(
417                &[(commit_id1, 0..12)],
418                &ContentDiff::by_line(["1a\n1b\n1c\n1d\n", "1A\n1b\n1C\n1d\n"])
419            ),
420            hashmap! { commit_id1 => vec![(0..3, 0..3), (6..9, 6..9)] }
421        );
422    }
423
424    #[test]
425    fn test_split_file_hunks_contiguous_ranges_insert() {
426        let commit_id1 = &CommitId::from_hex("111111");
427        let commit_id2 = &CommitId::from_hex("222222");
428
429        // insert first line
430        assert_eq!(
431            split_file_hunks(
432                &[(commit_id1, 0..6), (commit_id2, 6..12)],
433                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1X\n1a\n1b\n2a\n2b\n"])
434            ),
435            hashmap! { commit_id1 => vec![(0..0, 0..3)] }
436        );
437        // insert middle line to first range
438        assert_eq!(
439            split_file_hunks(
440                &[(commit_id1, 0..6), (commit_id2, 6..12)],
441                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1X\n1b\n2a\n2b\n"])
442            ),
443            hashmap! { commit_id1 => vec![(3..3, 3..6)] }
444        );
445        // insert middle line between ranges (ambiguous)
446        assert_eq!(
447            split_file_hunks(
448                &[(commit_id1, 0..6), (commit_id2, 6..12)],
449                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n3X\n2a\n2b\n"])
450            ),
451            hashmap! {}
452        );
453        // insert middle line to second range
454        assert_eq!(
455            split_file_hunks(
456                &[(commit_id1, 0..6), (commit_id2, 6..12)],
457                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2X\n2b\n"])
458            ),
459            hashmap! { commit_id2 => vec![(9..9, 9..12)] }
460        );
461        // insert last line
462        assert_eq!(
463            split_file_hunks(
464                &[(commit_id1, 0..6), (commit_id2, 6..12)],
465                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2b\n2X\n"])
466            ),
467            hashmap! { commit_id2 => vec![(12..12, 12..15)] }
468        );
469    }
470
471    #[test]
472    fn test_split_file_hunks_contiguous_ranges_delete() {
473        let commit_id1 = &CommitId::from_hex("111111");
474        let commit_id2 = &CommitId::from_hex("222222");
475
476        // delete first line
477        assert_eq!(
478            split_file_hunks(
479                &[(commit_id1, 0..6), (commit_id2, 6..12)],
480                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1b\n2a\n2b\n"])
481            ),
482            hashmap! { commit_id1 => vec![(0..3, 0..0)] }
483        );
484        // delete middle line from first range
485        assert_eq!(
486            split_file_hunks(
487                &[(commit_id1, 0..6), (commit_id2, 6..12)],
488                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n2a\n2b\n"])
489            ),
490            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
491        );
492        // delete middle line from second range
493        assert_eq!(
494            split_file_hunks(
495                &[(commit_id1, 0..6), (commit_id2, 6..12)],
496                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2b\n"])
497            ),
498            hashmap! { commit_id2 => vec![(6..9, 6..6)] }
499        );
500        // delete last line
501        assert_eq!(
502            split_file_hunks(
503                &[(commit_id1, 0..6), (commit_id2, 6..12)],
504                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n"])
505            ),
506            hashmap! { commit_id2 => vec![(9..12, 9..9)] }
507        );
508        // delete first and last lines
509        assert_eq!(
510            split_file_hunks(
511                &[(commit_id1, 0..6), (commit_id2, 6..12)],
512                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1b\n2a\n"])
513            ),
514            hashmap! {
515                commit_id1 => vec![(0..3, 0..0)],
516                commit_id2 => vec![(9..12, 6..6)],
517            }
518        );
519
520        // delete across ranges (split first annotation range)
521        assert_eq!(
522            split_file_hunks(
523                &[(commit_id1, 0..6), (commit_id2, 6..12)],
524                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n"])
525            ),
526            hashmap! {
527                commit_id1 => vec![(3..6, 3..3)],
528                commit_id2 => vec![(6..12, 3..3)],
529            }
530        );
531        // delete middle lines across ranges (split both annotation ranges)
532        assert_eq!(
533            split_file_hunks(
534                &[(commit_id1, 0..6), (commit_id2, 6..12)],
535                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n2b\n"])
536            ),
537            hashmap! {
538                commit_id1 => vec![(3..6, 3..3)],
539                commit_id2 => vec![(6..9, 3..3)],
540            }
541        );
542        // delete across ranges (split second annotation range)
543        assert_eq!(
544            split_file_hunks(
545                &[(commit_id1, 0..6), (commit_id2, 6..12)],
546                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "2b\n"])
547            ),
548            hashmap! {
549                commit_id1 => vec![(0..6, 0..0)],
550                commit_id2 => vec![(6..9, 0..0)],
551            }
552        );
553
554        // delete all
555        assert_eq!(
556            split_file_hunks(
557                &[(commit_id1, 0..6), (commit_id2, 6..12)],
558                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", ""])
559            ),
560            hashmap! {
561                commit_id1 => vec![(0..6, 0..0)],
562                commit_id2 => vec![(6..12, 0..0)],
563            }
564        );
565    }
566
567    #[test]
568    fn test_split_file_hunks_contiguous_ranges_modify() {
569        let commit_id1 = &CommitId::from_hex("111111");
570        let commit_id2 = &CommitId::from_hex("222222");
571
572        // modify first line
573        assert_eq!(
574            split_file_hunks(
575                &[(commit_id1, 0..6), (commit_id2, 6..12)],
576                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2a\n2b\n"])
577            ),
578            hashmap! { commit_id1 => vec![(0..3, 0..3)] }
579        );
580        // modify middle line of first range
581        assert_eq!(
582            split_file_hunks(
583                &[(commit_id1, 0..6), (commit_id2, 6..12)],
584                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2a\n2b\n"])
585            ),
586            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
587        );
588        // modify middle lines of both ranges (ambiguous)
589        // ('hg absorb' accepts this)
590        assert_eq!(
591            split_file_hunks(
592                &[(commit_id1, 0..6), (commit_id2, 6..12)],
593                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2A\n2b\n"])
594            ),
595            hashmap! {}
596        );
597        // modify middle line of second range
598        assert_eq!(
599            split_file_hunks(
600                &[(commit_id1, 0..6), (commit_id2, 6..12)],
601                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2A\n2b\n"])
602            ),
603            hashmap! { commit_id2 => vec![(6..9, 6..9)] }
604        );
605        // modify last line
606        assert_eq!(
607            split_file_hunks(
608                &[(commit_id1, 0..6), (commit_id2, 6..12)],
609                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2a\n2B\n"])
610            ),
611            hashmap! { commit_id2 => vec![(9..12, 9..12)] }
612        );
613        // modify first and last lines
614        assert_eq!(
615            split_file_hunks(
616                &[(commit_id1, 0..6), (commit_id2, 6..12)],
617                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2a\n2B\n"])
618            ),
619            hashmap! {
620                commit_id1 => vec![(0..3, 0..3)],
621                commit_id2 => vec![(9..12, 9..12)],
622            }
623        );
624    }
625
626    #[test]
627    fn test_split_file_hunks_contiguous_ranges_modify_insert() {
628        let commit_id1 = &CommitId::from_hex("111111");
629        let commit_id2 = &CommitId::from_hex("222222");
630
631        // modify first range, insert adjacent middle line
632        assert_eq!(
633            split_file_hunks(
634                &[(commit_id1, 0..6), (commit_id2, 6..12)],
635                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1B\n1X\n2a\n2b\n"])
636            ),
637            hashmap! { commit_id1 => vec![(0..6, 0..9)] }
638        );
639        // modify second range, insert adjacent middle line
640        assert_eq!(
641            split_file_hunks(
642                &[(commit_id1, 0..6), (commit_id2, 6..12)],
643                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2X\n2A\n2B\n"])
644            ),
645            hashmap! { commit_id2 => vec![(6..12, 6..15)] }
646        );
647        // modify second range, insert last line
648        assert_eq!(
649            split_file_hunks(
650                &[(commit_id1, 0..6), (commit_id2, 6..12)],
651                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2A\n2B\n2X\n"])
652            ),
653            hashmap! { commit_id2 => vec![(6..12, 6..15)] }
654        );
655        // modify first and last lines (unambiguous), insert middle line between
656        // ranges (ambiguous)
657        assert_eq!(
658            split_file_hunks(
659                &[(commit_id1, 0..6), (commit_id2, 6..12)],
660                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n3X\n2a\n2B\n"])
661            ),
662            hashmap! {
663                commit_id1 => vec![(0..3, 0..3)],
664                commit_id2 => vec![(9..12, 12..15)],
665            }
666        );
667    }
668
669    #[test]
670    fn test_split_file_hunks_contiguous_ranges_modify_delete() {
671        let commit_id1 = &CommitId::from_hex("111111");
672        let commit_id2 = &CommitId::from_hex("222222");
673
674        // modify first line, delete adjacent middle line
675        assert_eq!(
676            split_file_hunks(
677                &[(commit_id1, 0..6), (commit_id2, 6..12)],
678                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n2a\n2b\n"])
679            ),
680            hashmap! { commit_id1 => vec![(0..6, 0..3)] }
681        );
682        // modify last line, delete adjacent middle line
683        assert_eq!(
684            split_file_hunks(
685                &[(commit_id1, 0..6), (commit_id2, 6..12)],
686                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1b\n2B\n"])
687            ),
688            hashmap! { commit_id2 => vec![(6..12, 6..9)] }
689        );
690        // modify first and last lines, delete middle line from first range
691        assert_eq!(
692            split_file_hunks(
693                &[(commit_id1, 0..6), (commit_id2, 6..12)],
694                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n2a\n2B\n"])
695            ),
696            hashmap! {
697                commit_id1 => vec![(0..6, 0..3)],
698                commit_id2 => vec![(9..12, 6..9)],
699            }
700        );
701        // modify first and last lines, delete middle line from second range
702        assert_eq!(
703            split_file_hunks(
704                &[(commit_id1, 0..6), (commit_id2, 6..12)],
705                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1A\n1b\n2B\n"])
706            ),
707            hashmap! {
708                commit_id1 => vec![(0..3, 0..3)],
709                commit_id2 => vec![(6..12, 6..9)],
710            }
711        );
712        // modify middle line, delete adjacent middle line (ambiguous)
713        assert_eq!(
714            split_file_hunks(
715                &[(commit_id1, 0..6), (commit_id2, 6..12)],
716                &ContentDiff::by_line(["1a\n1b\n2a\n2b\n", "1a\n1B\n2b\n"])
717            ),
718            hashmap! {}
719        );
720    }
721
722    #[test]
723    fn test_split_file_hunks_non_contiguous_ranges_insert() {
724        let commit_id1 = &CommitId::from_hex("111111");
725        let commit_id2 = &CommitId::from_hex("222222");
726
727        // insert middle line to first range
728        assert_eq!(
729            split_file_hunks(
730                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
731                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0a\n2a\n2b\n"])
732            ),
733            hashmap! { commit_id1 => vec![(6..6, 6..9)] }
734        );
735        // insert middle line to second range
736        assert_eq!(
737            split_file_hunks(
738                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
739                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2X\n2a\n2b\n"])
740            ),
741            hashmap! { commit_id2 => vec![(9..9, 9..12)] }
742        );
743        // insert middle lines to both ranges
744        assert_eq!(
745            split_file_hunks(
746                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
747                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0a\n2X\n2a\n2b\n"])
748            ),
749            hashmap! {
750                commit_id1 => vec![(6..6, 6..9)],
751                commit_id2 => vec![(9..9, 12..15)],
752            }
753        );
754    }
755
756    #[test]
757    fn test_split_file_hunks_non_contiguous_ranges_insert_modify_masked() {
758        let commit_id1 = &CommitId::from_hex("111111");
759        let commit_id2 = &CommitId::from_hex("222222");
760
761        // insert middle line to first range, modify masked line (ambiguous)
762        assert_eq!(
763            split_file_hunks(
764                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
765                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0A\n2a\n2b\n"])
766            ),
767            hashmap! {}
768        );
769        // insert middle line to second range, modify masked line (ambiguous)
770        assert_eq!(
771            split_file_hunks(
772                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
773                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2X\n2a\n2b\n"])
774            ),
775            hashmap! {}
776        );
777        // insert middle lines to both ranges, modify masked line (ambiguous)
778        assert_eq!(
779            split_file_hunks(
780                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
781                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n1X\n0A\n2X\n2a\n2b\n"])
782            ),
783            hashmap! {}
784        );
785    }
786
787    #[test]
788    fn test_split_file_hunks_non_contiguous_ranges_delete() {
789        let commit_id1 = &CommitId::from_hex("111111");
790        let commit_id2 = &CommitId::from_hex("222222");
791
792        // delete middle line from first range
793        assert_eq!(
794            split_file_hunks(
795                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
796                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0a\n2a\n2b\n"])
797            ),
798            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
799        );
800        // delete middle line from second range
801        assert_eq!(
802            split_file_hunks(
803                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
804                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2b\n"])
805            ),
806            hashmap! { commit_id2 => vec![(9..12, 9..9)] }
807        );
808        // delete middle lines from both ranges
809        assert_eq!(
810            split_file_hunks(
811                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
812                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0a\n2b\n"])
813            ),
814            hashmap! {
815                commit_id1 => vec![(3..6, 3..3)],
816                commit_id2 => vec![(9..12, 6..6)],
817            }
818        );
819    }
820
821    #[test]
822    fn test_split_file_hunks_non_contiguous_ranges_delete_modify_masked() {
823        let commit_id1 = &CommitId::from_hex("111111");
824        let commit_id2 = &CommitId::from_hex("222222");
825
826        // delete middle line from first range, modify masked line (ambiguous)
827        assert_eq!(
828            split_file_hunks(
829                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
830                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0A\n2a\n2b\n"])
831            ),
832            hashmap! {}
833        );
834        // delete middle line from second range, modify masked line (ambiguous)
835        assert_eq!(
836            split_file_hunks(
837                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
838                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2b\n"])
839            ),
840            hashmap! {}
841        );
842        // delete middle lines from both ranges, modify masked line (ambiguous)
843        assert_eq!(
844            split_file_hunks(
845                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
846                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n0A\n2b\n"])
847            ),
848            hashmap! {}
849        );
850    }
851
852    #[test]
853    fn test_split_file_hunks_non_contiguous_ranges_delete_delete_masked() {
854        let commit_id1 = &CommitId::from_hex("111111");
855        let commit_id2 = &CommitId::from_hex("222222");
856
857        // 'hg absorb' accepts these, but it seems better to reject them as
858        // ambiguous. Masked lines cannot be deleted.
859
860        // delete middle line from first range, delete masked line (ambiguous)
861        assert_eq!(
862            split_file_hunks(
863                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
864                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n2a\n2b\n"])
865            ),
866            hashmap! {}
867        );
868        // delete middle line from second range, delete masked line (ambiguous)
869        assert_eq!(
870            split_file_hunks(
871                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
872                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n2b\n"])
873            ),
874            hashmap! {}
875        );
876        // delete middle lines from both ranges, delete masked line (ambiguous)
877        assert_eq!(
878            split_file_hunks(
879                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
880                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n2b\n"])
881            ),
882            hashmap! {}
883        );
884    }
885
886    #[test]
887    fn test_split_file_hunks_non_contiguous_ranges_modify() {
888        let commit_id1 = &CommitId::from_hex("111111");
889        let commit_id2 = &CommitId::from_hex("222222");
890
891        // modify middle line of first range
892        assert_eq!(
893            split_file_hunks(
894                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
895                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0a\n2a\n2b\n"])
896            ),
897            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
898        );
899        // modify middle line of second range
900        assert_eq!(
901            split_file_hunks(
902                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
903                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0a\n2A\n2b\n"])
904            ),
905            hashmap! { commit_id2 => vec![(9..12, 9..12)] }
906        );
907        // modify middle lines of both ranges
908        assert_eq!(
909            split_file_hunks(
910                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
911                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0a\n2A\n2b\n"])
912            ),
913            hashmap! {
914                commit_id1 => vec![(3..6, 3..6)],
915                commit_id2 => vec![(9..12, 9..12)],
916            }
917        );
918    }
919
920    #[test]
921    fn test_split_file_hunks_non_contiguous_ranges_modify_modify_masked() {
922        let commit_id1 = &CommitId::from_hex("111111");
923        let commit_id2 = &CommitId::from_hex("222222");
924
925        // modify middle line of first range, modify masked line (ambiguous)
926        assert_eq!(
927            split_file_hunks(
928                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
929                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0A\n2a\n2b\n"])
930            ),
931            hashmap! {}
932        );
933        // modify middle line of second range, modify masked line (ambiguous)
934        assert_eq!(
935            split_file_hunks(
936                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
937                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1b\n0A\n2A\n2b\n"])
938            ),
939            hashmap! {}
940        );
941        // modify middle lines to both ranges, modify masked line (ambiguous)
942        assert_eq!(
943            split_file_hunks(
944                &[(commit_id1, 0..6), /* 6..9, */ (commit_id2, 9..15)],
945                &ContentDiff::by_line(["1a\n1b\n0a\n2a\n2b\n", "1a\n1B\n0A\n2A\n2b\n"])
946            ),
947            hashmap! {}
948        );
949    }
950
951    #[test]
952    fn test_split_file_hunks_non_contiguous_tail_range_insert() {
953        let commit_id1 = &CommitId::from_hex("111111");
954
955        // insert middle line to range
956        assert_eq!(
957            split_file_hunks(
958                &[(commit_id1, 0..6) /* , 6..9 */],
959                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1b\n1X\n0a\n"])
960            ),
961            hashmap! { commit_id1 => vec![(6..6, 6..9)] }
962        );
963    }
964
965    #[test]
966    fn test_split_file_hunks_non_contiguous_tail_range_insert_modify_masked() {
967        let commit_id1 = &CommitId::from_hex("111111");
968
969        // insert middle line to range, modify masked line (ambiguous)
970        assert_eq!(
971            split_file_hunks(
972                &[(commit_id1, 0..6) /* , 6..9 */],
973                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1b\n1X\n0A\n"])
974            ),
975            hashmap! {}
976        );
977    }
978
979    #[test]
980    fn test_split_file_hunks_non_contiguous_tail_range_delete() {
981        let commit_id1 = &CommitId::from_hex("111111");
982
983        // delete middle line from range
984        assert_eq!(
985            split_file_hunks(
986                &[(commit_id1, 0..6) /* , 6..9 */],
987                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n0a\n"])
988            ),
989            hashmap! { commit_id1 => vec![(3..6, 3..3)] }
990        );
991        // delete all lines from range
992        assert_eq!(
993            split_file_hunks(
994                &[(commit_id1, 0..6) /* , 6..9 */],
995                &ContentDiff::by_line(["1a\n1b\n0a\n", "0a\n"])
996            ),
997            hashmap! { commit_id1 => vec![(0..6, 0..0)] }
998        );
999    }
1000
1001    #[test]
1002    fn test_split_file_hunks_non_contiguous_tail_range_delete_modify_masked() {
1003        let commit_id1 = &CommitId::from_hex("111111");
1004
1005        // delete middle line from range, modify masked line (ambiguous)
1006        assert_eq!(
1007            split_file_hunks(
1008                &[(commit_id1, 0..6) /* , 6..9 */],
1009                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n0A\n"])
1010            ),
1011            hashmap! {}
1012        );
1013        // delete all lines from range, modify masked line (ambiguous)
1014        assert_eq!(
1015            split_file_hunks(
1016                &[(commit_id1, 0..6) /* , 6..9 */],
1017                &ContentDiff::by_line(["1a\n1b\n0a\n", "0A\n"])
1018            ),
1019            hashmap! {}
1020        );
1021    }
1022
1023    #[test]
1024    fn test_split_file_hunks_non_contiguous_tail_range_delete_delete_masked() {
1025        let commit_id1 = &CommitId::from_hex("111111");
1026
1027        // 'hg absorb' accepts these, but it seems better to reject them as
1028        // ambiguous. Masked lines cannot be deleted.
1029
1030        // delete middle line from range, delete masked line (ambiguous)
1031        assert_eq!(
1032            split_file_hunks(
1033                &[(commit_id1, 0..6) /* , 6..9 */],
1034                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n"])
1035            ),
1036            hashmap! {}
1037        );
1038        // delete all lines from range, delete masked line (ambiguous)
1039        assert_eq!(
1040            split_file_hunks(
1041                &[(commit_id1, 0..6) /* , 6..9 */],
1042                &ContentDiff::by_line(["1a\n1b\n0a\n", ""])
1043            ),
1044            hashmap! {}
1045        );
1046    }
1047
1048    #[test]
1049    fn test_split_file_hunks_non_contiguous_tail_range_modify() {
1050        let commit_id1 = &CommitId::from_hex("111111");
1051
1052        // modify middle line of range
1053        assert_eq!(
1054            split_file_hunks(
1055                &[(commit_id1, 0..6) /* , 6..9 */],
1056                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1B\n0a\n"])
1057            ),
1058            hashmap! { commit_id1 => vec![(3..6, 3..6)] }
1059        );
1060    }
1061
1062    #[test]
1063    fn test_split_file_hunks_non_contiguous_tail_range_modify_modify_masked() {
1064        let commit_id1 = &CommitId::from_hex("111111");
1065
1066        // modify middle line of range, modify masked line (ambiguous)
1067        assert_eq!(
1068            split_file_hunks(
1069                &[(commit_id1, 0..6) /* , 6..9 */],
1070                &ContentDiff::by_line(["1a\n1b\n0a\n", "1a\n1B\n0A\n"])
1071            ),
1072            hashmap! {}
1073        );
1074    }
1075
1076    #[test]
1077    fn test_split_file_hunks_multiple_edits() {
1078        let commit_id1 = &CommitId::from_hex("111111");
1079        let commit_id2 = &CommitId::from_hex("222222");
1080        let commit_id3 = &CommitId::from_hex("333333");
1081
1082        assert_eq!(
1083            split_file_hunks(
1084                &[
1085                    (commit_id1, 0..3),   // 1a       => 1A
1086                    (commit_id2, 3..6),   // 2a       => 2a
1087                    (commit_id1, 6..15),  // 1b 1c 1d => 1B 1d
1088                    (commit_id3, 15..21), // 3a 3b    => 3X 3A 3b 3Y
1089                ],
1090                &ContentDiff::by_line([
1091                    "1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1092                    "1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n"
1093                ])
1094            ),
1095            hashmap! {
1096                commit_id1 => vec![(0..3, 0..3), (6..12, 6..9)],
1097                commit_id3 => vec![(15..18, 12..18), (21..21, 21..24)],
1098            }
1099        );
1100    }
1101
1102    #[test]
1103    fn test_combine_texts() {
1104        assert_eq!(combine_texts(b"", b"", &[]), "");
1105        assert_eq!(combine_texts(b"foo", b"bar", &[]), "foo");
1106        assert_eq!(combine_texts(b"foo", b"bar", &[(0..3, 0..3)]), "bar");
1107
1108        assert_eq!(
1109            combine_texts(
1110                b"1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1111                b"1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n",
1112                &[(0..3, 0..3), (6..12, 6..9)]
1113            ),
1114            "1A\n2a\n1B\n1d\n3a\n3b\n"
1115        );
1116        assert_eq!(
1117            combine_texts(
1118                b"1a\n2a\n1b\n1c\n1d\n3a\n3b\n",
1119                b"1A\n2a\n1B\n1d\n3X\n3A\n3b\n3Y\n",
1120                &[(15..18, 12..18), (21..21, 21..24)]
1121            ),
1122            "1a\n2a\n1b\n1c\n1d\n3X\n3A\n3b\n3Y\n"
1123        );
1124    }
1125}