1use diff::slice as diff_slice;
16use std::fmt::{Debug, Error, Formatter};
17
18fn is_word_byte(a: u8) -> bool {
19 a.is_ascii_alphanumeric() || a == b'_'
20}
21
22fn is_same_word(a: u8, b: u8) -> bool {
23 (is_word_byte(a) && is_word_byte(b)) || a & 0x80 != 0
25}
26
27fn tokenize(data: &[u8]) -> Vec<Vec<u8>> {
28 let mut output = vec![];
31 let mut current = vec![];
32 let mut maybe_prev: Option<u8> = None;
33 for b in data {
34 let b = *b;
35 match maybe_prev {
36 None => current.push(b),
37 Some(prev) => {
38 if is_same_word(prev, b) {
39 current.push(b);
40 } else {
41 output.push(current);
42 current = vec![b];
43 }
44 }
45 }
46 maybe_prev = Some(b);
47 }
48 if !current.is_empty() {
49 output.push(current);
50 }
51 output
52}
53
54#[derive(PartialEq, Eq, Clone, Debug)]
55pub enum DiffHunk {
56 Unmodified(Vec<u8>),
57 Added(Vec<u8>),
58 Removed(Vec<u8>),
59}
60
61#[derive(PartialEq, Eq, Clone, Debug)]
62pub struct DiffLine {
63 pub left_line_number: u32,
64 pub right_line_number: u32,
65 pub has_left_content: bool,
66 pub has_right_content: bool,
67 pub hunks: Vec<DiffHunk>,
68}
69
70impl DiffLine {
71 fn reset_line(&mut self) {
72 self.has_left_content = false;
73 self.has_right_content = false;
74 self.hunks.clear();
75 }
76
77 pub fn is_unmodified(&self) -> bool {
78 self.hunks
79 .iter()
80 .all(|hunk| matches!(hunk, DiffHunk::Unmodified(_)))
81 }
82}
83
84pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) {
85 let left_tokens = tokenize(left);
88 let right_tokens = tokenize(right);
89 let result = diff_slice(&left_tokens, &right_tokens);
90 let mut diff_line = DiffLine {
91 left_line_number: 1,
92 right_line_number: 1,
93 has_left_content: false,
94 has_right_content: false,
95 hunks: vec![],
96 };
97 for hunk in result {
98 match hunk {
99 diff::Result::Both(left, right) => {
100 assert!(left == right);
101 diff_line.has_left_content = true;
102 diff_line.has_right_content = true;
103 diff_line.hunks.push(DiffHunk::Unmodified(left.clone()));
104 if left == &[b'\n'] {
105 callback(&diff_line);
106 diff_line.left_line_number += 1;
107 diff_line.right_line_number += 1;
108 diff_line.reset_line();
109 }
110 }
111 diff::Result::Left(left) => {
112 diff_line.has_left_content = true;
113 diff_line.hunks.push(DiffHunk::Removed(left.clone()));
114 if left == &[b'\n'] {
115 callback(&diff_line);
116 diff_line.left_line_number += 1;
117 diff_line.reset_line();
118 }
119 }
120 diff::Result::Right(right) => {
121 diff_line.has_right_content = true;
122 diff_line.hunks.push(DiffHunk::Added(right.clone()));
123 if right == &[b'\n'] {
124 callback(&diff_line);
125 diff_line.right_line_number += 1;
126 diff_line.reset_line();
127 }
128 }
129 }
130 }
131 if !diff_line.hunks.is_empty() {
132 callback(&diff_line);
133 }
134}
135
136#[derive(PartialEq, Eq, Clone)]
137pub enum MergeHunk {
138 Resolved(Vec<u8>),
139 Conflict {
140 base: Vec<u8>,
141 left: Vec<u8>,
142 right: Vec<u8>,
143 },
144}
145
146impl Debug for MergeHunk {
147 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
148 match self {
149 MergeHunk::Resolved(data) => f
150 .debug_tuple("Resolved")
151 .field(&String::from_utf8_lossy(data))
152 .finish(),
153 MergeHunk::Conflict { base, left, right } => f
154 .debug_struct("Conflict")
155 .field("base", &String::from_utf8_lossy(base))
156 .field("left", &String::from_utf8_lossy(left))
157 .field("right", &String::from_utf8_lossy(right))
158 .finish(),
159 }
160 }
161}
162
163#[derive(Debug, PartialEq, Eq, Clone)]
164pub enum MergeResult {
165 Resolved(Vec<u8>),
166 Conflict(Vec<MergeHunk>),
167}
168
169pub fn merge(base: &[u8], left: &[u8], right: &[u8]) -> MergeResult {
171 let base_tokens = tokenize(base);
172 let left_tokens = tokenize(left);
173 let right_tokens = tokenize(right);
174
175 let left_diff = diff_slice(&base_tokens, &left_tokens);
176 let right_diff = diff_slice(&base_tokens, &right_tokens);
177
178 let mut hunk: Vec<u8> = vec![];
179 let mut hunks: Vec<MergeHunk> = vec![];
180 let mut left_it = left_diff.iter();
181 let mut right_it = right_diff.iter();
182
183 let mut left_hunk = left_it.next();
184 let mut right_hunk = right_it.next();
185 loop {
186 match (left_hunk, right_hunk) {
187 (None, None) => {
188 break;
189 }
190 (Some(diff::Result::Both(left_data_before, left_data_after)), _)
191 if left_data_before == left_data_after =>
192 {
193 match right_hunk.unwrap() {
195 diff::Result::Both(right_data_before, right_data_after) => {
196 assert_eq!(left_data_before, right_data_before);
198 hunk.append(&mut right_data_after.to_vec());
199 left_hunk = left_it.next();
200 right_hunk = right_it.next();
201 }
202 diff::Result::Left(right_data_before) => {
203 assert_eq!(left_data_before, right_data_before);
205 left_hunk = left_it.next();
206 right_hunk = right_it.next();
207 }
208 diff::Result::Right(right_data_after) => {
209 hunk.append(&mut right_data_after.to_vec());
211 right_hunk = right_it.next();
212 }
213 }
214 }
215 (_, Some(diff::Result::Both(right_data_before, right_data_after)))
216 if right_data_before == right_data_after =>
217 {
218 match left_hunk.unwrap() {
220 diff::Result::Both(left_data_before, left_data_after) => {
221 assert_eq!(left_data_before, right_data_before);
223 hunk.append(&mut left_data_after.to_vec());
224 left_hunk = left_it.next();
225 right_hunk = right_it.next();
226 }
227 diff::Result::Left(left_data_before) => {
228 assert_eq!(left_data_before, right_data_before);
230 left_hunk = left_it.next();
231 right_hunk = right_it.next();
232 }
233 diff::Result::Right(left_data_after) => {
234 hunk.append(&mut left_data_after.to_vec());
236 left_hunk = left_it.next();
237 }
238 }
239 }
240 (
241 Some(diff::Result::Left(left_data_before)),
242 Some(diff::Result::Left(right_data_before)),
243 ) => {
244 assert_eq!(left_data_before, right_data_before);
246 left_hunk = left_it.next();
247 right_hunk = right_it.next();
248 }
249 (
250 Some(diff::Result::Right(left_data_after)),
251 Some(diff::Result::Right(right_data_after)),
252 ) => {
253 if left_data_after == right_data_after {
254 hunk.append(&mut left_data_after.to_vec());
256 } else {
257 if !hunk.is_empty() {
259 hunks.push(MergeHunk::Resolved(hunk));
260 }
261 hunks.push(MergeHunk::Conflict {
262 base: vec![],
263 left: left_data_after.to_vec(),
264 right: right_data_after.to_vec(),
265 });
266 hunk = vec![];
267 }
268 left_hunk = left_it.next();
269 right_hunk = right_it.next();
270 }
271 (Some(diff::Result::Right(left_data_after)), None) => {
272 hunk.append(&mut left_data_after.to_vec());
274 left_hunk = left_it.next();
275 }
276 (None, Some(diff::Result::Right(right_data_after))) => {
277 hunk.append(&mut right_data_after.to_vec());
279 right_hunk = right_it.next();
280 }
281 _ => {
282 panic!("unhandled merge case: {:?}, {:?}", left_hunk, right_hunk);
283 }
284 }
285 }
286 if hunks.is_empty() {
287 MergeResult::Resolved(hunk)
288 } else {
289 if !hunk.is_empty() {
290 hunks.push(MergeHunk::Resolved(hunk));
291 }
292 MergeResult::Conflict(hunks)
293 }
294}
295
296#[cfg(test)]
297mod tests {
298 use super::*;
299
300 #[test]
301 fn test_merge() {
302 assert_eq!(merge(b"", b"", b""), MergeResult::Resolved(b"".to_vec()));
303 assert_eq!(
304 merge(b"a", b"a", b"a"),
305 MergeResult::Resolved(b"a".to_vec())
306 );
307 assert_eq!(merge(b"a", b"", b"a"), MergeResult::Resolved(b"".to_vec()));
308 assert_eq!(merge(b"a", b"a", b""), MergeResult::Resolved(b"".to_vec()));
309 assert_eq!(merge(b"a", b"", b""), MergeResult::Resolved(b"".to_vec()));
310 assert_eq!(
311 merge(b"a", b"a b", b"a"),
312 MergeResult::Resolved(b"a b".to_vec())
313 );
314 assert_eq!(
315 merge(b"a", b"a", b"a b"),
316 MergeResult::Resolved(b"a b".to_vec())
317 );
318 assert_eq!(
319 merge(b"a", b"a b", b"a c"),
320 MergeResult::Conflict(vec![
321 MergeHunk::Resolved(b"a ".to_vec()),
322 MergeHunk::Conflict {
323 base: b"".to_vec(),
324 left: b"b".to_vec(),
325 right: b"c".to_vec()
326 }
327 ])
328 );
329 assert_eq!(
330 merge(b"a", b"b", b"a"),
331 MergeResult::Resolved(b"b".to_vec())
332 );
333 assert_eq!(
334 merge(b"a", b"a", b"b"),
335 MergeResult::Resolved(b"b".to_vec())
336 );
337 assert_eq!(merge(b"a", b"", b"b"), MergeResult::Resolved(b"b".to_vec()));
341 assert_eq!(merge(b"a", b"b", b""), MergeResult::Resolved(b"b".to_vec()));
342 assert_eq!(
343 merge(b"a", b"b", b"c"),
344 MergeResult::Conflict(vec![MergeHunk::Conflict {
345 base: b"".to_vec(),
346 left: b"b".to_vec(),
347 right: b"c".to_vec()
348 }])
349 );
350 }
351}