Skip to main content

sley_diff_merge/
range.rs

1//! Patch-series assignment for `range-diff`.
2//!
3//! The engine is deliberately repository-agnostic: callers render each commit
4//! into normalized patch text and pass borrowed subjects/diff bodies here.
5
6use std::collections::{HashMap, HashSet};
7
8const COST_MAX: i32 = i32::MAX / 4;
9type AssignmentMemo = HashMap<(usize, u64), i32>;
10
11/// Borrowed patch data needed to assign two range-diff series.
12#[derive(Clone, Copy, Debug)]
13pub struct PatchRef<'a> {
14    pub subject: &'a str,
15    pub diff: &'a [u8],
16    pub diff_size: i32,
17}
18
19struct AssignmentCtx<'a> {
20    left_ids: &'a [usize],
21    right_ids: &'a [usize],
22    left: &'a [PatchRef<'a>],
23    right: &'a [PatchRef<'a>],
24    creation_factor: i32,
25    match_costs: Vec<Vec<i32>>,
26}
27
28/// Assign corresponding patches between two patch series.
29///
30/// Returns `(left_index, right_index)` pairs for exact matches and the best
31/// inexact assignments selected by the range-diff cost model.
32pub fn assign_patch_series(
33    left: &[PatchRef<'_>],
34    right: &[PatchRef<'_>],
35    creation_factor: i32,
36) -> Vec<(usize, usize)> {
37    let mut pairs = find_exact_matches(left, right);
38    let matched_left = pairs.iter().map(|(li, _)| *li).collect::<HashSet<_>>();
39    let matched_right = pairs.iter().map(|(_, rj)| *rj).collect::<HashSet<_>>();
40    let left_unmatched = (0..left.len())
41        .filter(|idx| !matched_left.contains(idx))
42        .collect::<Vec<_>>();
43    let right_unmatched = (0..right.len())
44        .filter(|idx| !matched_right.contains(idx))
45        .collect::<Vec<_>>();
46    if left_unmatched.is_empty() || right_unmatched.len() > 62 {
47        return pairs;
48    }
49
50    let match_costs = precompute_match_costs(left, right, &left_unmatched, &right_unmatched);
51    let ctx = AssignmentCtx {
52        left_ids: &left_unmatched,
53        right_ids: &right_unmatched,
54        left,
55        right,
56        creation_factor,
57        match_costs,
58    };
59    let mut memo = HashMap::new();
60    assignment_dp(0, 0, &ctx, &mut memo);
61    backtrack_assignment(0, 0, &ctx, &mut memo, &mut pairs);
62    pairs
63}
64
65fn find_exact_matches(left: &[PatchRef<'_>], right: &[PatchRef<'_>]) -> Vec<(usize, usize)> {
66    let mut pairs = Vec::new();
67    let mut used = HashSet::new();
68    for (i, left_patch) in left.iter().enumerate() {
69        for (j, right_patch) in right.iter().enumerate() {
70            if used.contains(&j) {
71                continue;
72            }
73            if left_patch.diff == right_patch.diff {
74                pairs.push((i, j));
75                used.insert(j);
76                break;
77            }
78        }
79    }
80    pairs
81}
82
83fn precompute_match_costs(
84    left: &[PatchRef<'_>],
85    right: &[PatchRef<'_>],
86    left_ids: &[usize],
87    right_ids: &[usize],
88) -> Vec<Vec<i32>> {
89    let mut rendered = Vec::new();
90    left_ids
91        .iter()
92        .map(|&li| {
93            right_ids
94                .iter()
95                .map(|&rj| {
96                    let cost = diff_size_into(&mut rendered, left[li].diff, right[rj].diff);
97                    if left[li].subject == right[rj].subject {
98                        cost / 2
99                    } else {
100                        cost
101                    }
102                })
103                .collect()
104        })
105        .collect()
106}
107
108fn assignment_dp(pos: usize, used: u64, ctx: &AssignmentCtx<'_>, memo: &mut AssignmentMemo) -> i32 {
109    if let Some(value) = memo.get(&(pos, used)) {
110        return *value;
111    }
112    let cost = if pos == ctx.left_ids.len() {
113        let mut cost = 0;
114        for (bit, &rid) in ctx.right_ids.iter().enumerate() {
115            if used & (1u64 << bit) == 0 {
116                cost = saturating_cost(cost, ctx.right[rid].diff_size * ctx.creation_factor / 100);
117            }
118        }
119        cost
120    } else {
121        let li = ctx.left_ids[pos];
122        let delete_cost = ctx.left[li].diff_size * ctx.creation_factor / 100;
123        let tail_cost = assignment_dp(pos + 1, used, ctx, memo);
124        let mut best = saturating_cost(delete_cost, tail_cost);
125
126        for bit in 0..ctx.right_ids.len() {
127            if used & (1u64 << bit) != 0 {
128                continue;
129            }
130            let tail_cost = assignment_dp(pos + 1, used | (1u64 << bit), ctx, memo);
131            let cost = saturating_cost(ctx.match_costs[pos][bit], tail_cost);
132            if cost < best {
133                best = cost;
134            }
135        }
136        best
137    };
138    memo.insert((pos, used), cost);
139    cost
140}
141
142fn backtrack_assignment(
143    pos: usize,
144    used: u64,
145    ctx: &AssignmentCtx<'_>,
146    memo: &mut AssignmentMemo,
147    pairs: &mut Vec<(usize, usize)>,
148) {
149    if pos == ctx.left_ids.len() {
150        return;
151    }
152    let li = ctx.left_ids[pos];
153    let delete_cost = ctx.left[li].diff_size * ctx.creation_factor / 100;
154    let tail_cost = assignment_dp(pos + 1, used, ctx, memo);
155    let mut best = saturating_cost(delete_cost, tail_cost);
156    let mut best_match = None;
157
158    for (bit, &rj) in ctx.right_ids.iter().enumerate() {
159        if used & (1u64 << bit) != 0 {
160            continue;
161        }
162        let tail_cost = assignment_dp(pos + 1, used | (1u64 << bit), ctx, memo);
163        let cost = saturating_cost(ctx.match_costs[pos][bit], tail_cost);
164        if cost < best {
165            best = cost;
166            best_match = Some((bit, rj));
167        }
168    }
169
170    if let Some((bit, rj)) = best_match {
171        backtrack_assignment(pos + 1, used | (1u64 << bit), ctx, memo, pairs);
172        pairs.push((li, rj));
173    } else {
174        backtrack_assignment(pos + 1, used, ctx, memo, pairs);
175    }
176}
177
178fn saturating_cost(a: i32, b: i32) -> i32 {
179    a.saturating_add(b).min(COST_MAX)
180}
181
182fn diff_size_into(rendered: &mut Vec<u8>, left: &[u8], right: &[u8]) -> i32 {
183    rendered.clear();
184    let mut heading = section_heading_classifier();
185    let mut opts = crate::render::HunkRenderOptions {
186        context: 3,
187        heading: Some(&mut heading),
188        ..Default::default()
189    };
190    crate::render::render_hunks(rendered, Some(left), Some(right), &mut opts);
191    rendered.iter().filter(|b| **b == b'\n').count() as i32
192}
193
194fn section_heading_classifier() -> impl FnMut(&[u8]) -> Option<Vec<u8>> {
195    move |line: &[u8]| {
196        let line = trim_ascii_end(line);
197        if line.starts_with(b" ## ") && line.ends_with(b" ##") {
198            return Some(line[4..line.len() - 3].to_vec());
199        }
200        let line = line.strip_prefix(b" ").unwrap_or(line);
201        if let Some(rest) = line.strip_prefix(b"@@ ") {
202            return Some(rest.to_vec());
203        }
204        None
205    }
206}
207
208fn trim_ascii_end(line: &[u8]) -> &[u8] {
209    let mut end = line.len();
210    while end > 0 && line[end - 1].is_ascii_whitespace() {
211        end -= 1;
212    }
213    &line[..end]
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    fn patch<'a>(subject: &'a str, diff: &'a [u8]) -> PatchRef<'a> {
221        PatchRef {
222            subject,
223            diff,
224            diff_size: diff.iter().filter(|b| **b == b'\n').count() as i32,
225        }
226    }
227
228    #[test]
229    fn exact_matches_are_greedy_and_do_not_reuse_rhs() {
230        let left = [patch("one", b"same\n"), patch("two", b"same\n")];
231        let right = [patch("right one", b"same\n"), patch("right two", b"same\n")];
232
233        assert_eq!(assign_patch_series(&left, &right, 60), vec![(0, 0), (1, 1)]);
234    }
235
236    #[test]
237    fn inexact_assignment_prefers_lowest_patch_diff_cost() {
238        let left = [patch("topic", b" ## file ##\n-old\n+new\n")];
239        let right = [
240            patch("other", b" ## file ##\n-completely\n+different\n"),
241            patch("topic", b" ## file ##\n-old\n+newer\n"),
242        ];
243
244        assert_eq!(assign_patch_series(&left, &right, 1_000), vec![(0, 1)]);
245    }
246
247    #[test]
248    fn too_many_rhs_candidates_keeps_only_exact_matches() {
249        let left = [patch("exact", b"same\n"), patch("unmatched", b"left\n")];
250        let mut right = vec![patch("exact", b"same\n")];
251        right.extend((0..63).map(|_| patch("candidate", b"right\n")));
252
253        assert_eq!(assign_patch_series(&left, &right, 60), vec![(0, 0)]);
254    }
255}