1use std::collections::{HashMap, HashSet};
7
8const COST_MAX: i32 = i32::MAX / 4;
9type AssignmentMemo = HashMap<(usize, u64), i32>;
10
11#[derive(Clone, Copy, Debug)]
13pub struct PatchRef<'a> {
14 pub subject: &'a str,
15 pub diff: &'a [u8],
16 pub diff_size: i32,
17}
18
19struct AssignmentCtx<'a> {
20 left_ids: &'a [usize],
21 right_ids: &'a [usize],
22 left: &'a [PatchRef<'a>],
23 right: &'a [PatchRef<'a>],
24 creation_factor: i32,
25 match_costs: Vec<Vec<i32>>,
26}
27
28pub fn assign_patch_series(
33 left: &[PatchRef<'_>],
34 right: &[PatchRef<'_>],
35 creation_factor: i32,
36) -> Vec<(usize, usize)> {
37 let mut pairs = find_exact_matches(left, right);
38 let matched_left = pairs.iter().map(|(li, _)| *li).collect::<HashSet<_>>();
39 let matched_right = pairs.iter().map(|(_, rj)| *rj).collect::<HashSet<_>>();
40 let left_unmatched = (0..left.len())
41 .filter(|idx| !matched_left.contains(idx))
42 .collect::<Vec<_>>();
43 let right_unmatched = (0..right.len())
44 .filter(|idx| !matched_right.contains(idx))
45 .collect::<Vec<_>>();
46 if left_unmatched.is_empty() || right_unmatched.len() > 62 {
47 return pairs;
48 }
49
50 let match_costs = precompute_match_costs(left, right, &left_unmatched, &right_unmatched);
51 let ctx = AssignmentCtx {
52 left_ids: &left_unmatched,
53 right_ids: &right_unmatched,
54 left,
55 right,
56 creation_factor,
57 match_costs,
58 };
59 let mut memo = HashMap::new();
60 assignment_dp(0, 0, &ctx, &mut memo);
61 backtrack_assignment(0, 0, &ctx, &mut memo, &mut pairs);
62 pairs
63}
64
65fn find_exact_matches(left: &[PatchRef<'_>], right: &[PatchRef<'_>]) -> Vec<(usize, usize)> {
66 let mut pairs = Vec::new();
67 let mut used = HashSet::new();
68 for (i, left_patch) in left.iter().enumerate() {
69 for (j, right_patch) in right.iter().enumerate() {
70 if used.contains(&j) {
71 continue;
72 }
73 if left_patch.diff == right_patch.diff {
74 pairs.push((i, j));
75 used.insert(j);
76 break;
77 }
78 }
79 }
80 pairs
81}
82
83fn precompute_match_costs(
84 left: &[PatchRef<'_>],
85 right: &[PatchRef<'_>],
86 left_ids: &[usize],
87 right_ids: &[usize],
88) -> Vec<Vec<i32>> {
89 let mut rendered = Vec::new();
90 left_ids
91 .iter()
92 .map(|&li| {
93 right_ids
94 .iter()
95 .map(|&rj| {
96 let cost = diff_size_into(&mut rendered, left[li].diff, right[rj].diff);
97 if left[li].subject == right[rj].subject {
98 cost / 2
99 } else {
100 cost
101 }
102 })
103 .collect()
104 })
105 .collect()
106}
107
108fn assignment_dp(pos: usize, used: u64, ctx: &AssignmentCtx<'_>, memo: &mut AssignmentMemo) -> i32 {
109 if let Some(value) = memo.get(&(pos, used)) {
110 return *value;
111 }
112 let cost = if pos == ctx.left_ids.len() {
113 let mut cost = 0;
114 for (bit, &rid) in ctx.right_ids.iter().enumerate() {
115 if used & (1u64 << bit) == 0 {
116 cost = saturating_cost(cost, ctx.right[rid].diff_size * ctx.creation_factor / 100);
117 }
118 }
119 cost
120 } else {
121 let li = ctx.left_ids[pos];
122 let delete_cost = ctx.left[li].diff_size * ctx.creation_factor / 100;
123 let tail_cost = assignment_dp(pos + 1, used, ctx, memo);
124 let mut best = saturating_cost(delete_cost, tail_cost);
125
126 for bit in 0..ctx.right_ids.len() {
127 if used & (1u64 << bit) != 0 {
128 continue;
129 }
130 let tail_cost = assignment_dp(pos + 1, used | (1u64 << bit), ctx, memo);
131 let cost = saturating_cost(ctx.match_costs[pos][bit], tail_cost);
132 if cost < best {
133 best = cost;
134 }
135 }
136 best
137 };
138 memo.insert((pos, used), cost);
139 cost
140}
141
142fn backtrack_assignment(
143 pos: usize,
144 used: u64,
145 ctx: &AssignmentCtx<'_>,
146 memo: &mut AssignmentMemo,
147 pairs: &mut Vec<(usize, usize)>,
148) {
149 if pos == ctx.left_ids.len() {
150 return;
151 }
152 let li = ctx.left_ids[pos];
153 let delete_cost = ctx.left[li].diff_size * ctx.creation_factor / 100;
154 let tail_cost = assignment_dp(pos + 1, used, ctx, memo);
155 let mut best = saturating_cost(delete_cost, tail_cost);
156 let mut best_match = None;
157
158 for (bit, &rj) in ctx.right_ids.iter().enumerate() {
159 if used & (1u64 << bit) != 0 {
160 continue;
161 }
162 let tail_cost = assignment_dp(pos + 1, used | (1u64 << bit), ctx, memo);
163 let cost = saturating_cost(ctx.match_costs[pos][bit], tail_cost);
164 if cost < best {
165 best = cost;
166 best_match = Some((bit, rj));
167 }
168 }
169
170 if let Some((bit, rj)) = best_match {
171 backtrack_assignment(pos + 1, used | (1u64 << bit), ctx, memo, pairs);
172 pairs.push((li, rj));
173 } else {
174 backtrack_assignment(pos + 1, used, ctx, memo, pairs);
175 }
176}
177
178fn saturating_cost(a: i32, b: i32) -> i32 {
179 a.saturating_add(b).min(COST_MAX)
180}
181
182fn diff_size_into(rendered: &mut Vec<u8>, left: &[u8], right: &[u8]) -> i32 {
183 rendered.clear();
184 let mut heading = section_heading_classifier();
185 let mut opts = crate::render::HunkRenderOptions {
186 context: 3,
187 heading: Some(&mut heading),
188 ..Default::default()
189 };
190 crate::render::render_hunks(rendered, Some(left), Some(right), &mut opts);
191 rendered.iter().filter(|b| **b == b'\n').count() as i32
192}
193
194fn section_heading_classifier() -> impl FnMut(&[u8]) -> Option<Vec<u8>> {
195 move |line: &[u8]| {
196 let line = trim_ascii_end(line);
197 if line.starts_with(b" ## ") && line.ends_with(b" ##") {
198 return Some(line[4..line.len() - 3].to_vec());
199 }
200 let line = line.strip_prefix(b" ").unwrap_or(line);
201 if let Some(rest) = line.strip_prefix(b"@@ ") {
202 return Some(rest.to_vec());
203 }
204 None
205 }
206}
207
208fn trim_ascii_end(line: &[u8]) -> &[u8] {
209 let mut end = line.len();
210 while end > 0 && line[end - 1].is_ascii_whitespace() {
211 end -= 1;
212 }
213 &line[..end]
214}
215
216#[cfg(test)]
217mod tests {
218 use super::*;
219
220 fn patch<'a>(subject: &'a str, diff: &'a [u8]) -> PatchRef<'a> {
221 PatchRef {
222 subject,
223 diff,
224 diff_size: diff.iter().filter(|b| **b == b'\n').count() as i32,
225 }
226 }
227
228 #[test]
229 fn exact_matches_are_greedy_and_do_not_reuse_rhs() {
230 let left = [patch("one", b"same\n"), patch("two", b"same\n")];
231 let right = [patch("right one", b"same\n"), patch("right two", b"same\n")];
232
233 assert_eq!(assign_patch_series(&left, &right, 60), vec![(0, 0), (1, 1)]);
234 }
235
236 #[test]
237 fn inexact_assignment_prefers_lowest_patch_diff_cost() {
238 let left = [patch("topic", b" ## file ##\n-old\n+new\n")];
239 let right = [
240 patch("other", b" ## file ##\n-completely\n+different\n"),
241 patch("topic", b" ## file ##\n-old\n+newer\n"),
242 ];
243
244 assert_eq!(assign_patch_series(&left, &right, 1_000), vec![(0, 1)]);
245 }
246
247 #[test]
248 fn too_many_rhs_candidates_keeps_only_exact_matches() {
249 let left = [patch("exact", b"same\n"), patch("unmatched", b"left\n")];
250 let mut right = vec![patch("exact", b"same\n")];
251 right.extend((0..63).map(|_| patch("candidate", b"right\n")));
252
253 assert_eq!(assign_patch_series(&left, &right, 60), vec![(0, 0)]);
254 }
255}