1use imara_diff::{
2 intern::{InternedInput, Token},
3 sources::{byte_lines_with_terminator, lines_with_terminator},
4};
5
6use crate::{
7 patch::{Hunk, HunkRange, Line, Patch},
8 range::{DiffRange, SliceLike},
9 sink::DiffyDiffRangeBuilder,
10 Algorithm,
11};
12use std::{borrow::Cow, cmp, hash::Hash, ops};
13
14mod cleanup;
15mod myers;
16
17#[cfg(test)]
18mod tests;
19
20#[allow(dead_code)]
22#[derive(Debug, PartialEq, Eq)]
23enum Diff<'a, T: ?Sized> {
24 Equal(&'a T),
25 Delete(&'a T),
26 Insert(&'a T),
27}
28
29impl<T: ?Sized> Copy for Diff<'_, T> {}
30
31impl<T: ?Sized> Clone for Diff<'_, T> {
32 fn clone(&self) -> Self {
33 *self
34 }
35}
36
37impl<'a, T> From<DiffRange<'a, 'a, T>> for Diff<'a, T>
38where
39 T: ?Sized + SliceLike,
40{
41 fn from(diff: DiffRange<'a, 'a, T>) -> Self {
42 match diff {
43 DiffRange::Equal(range, _) => Diff::Equal(range.as_slice()),
44 DiffRange::Delete(range) => Diff::Delete(range.as_slice()),
45 DiffRange::Insert(range) => Diff::Insert(range.as_slice()),
46 }
47 }
48}
49
50#[derive(Debug)]
52pub struct DiffOptions {
53 compact: bool,
54 context_len: usize,
55 algorithm: Algorithm,
56 original_filename: Option<Cow<'static, str>>,
57 modified_filename: Option<Cow<'static, str>>,
58}
59
60impl DiffOptions {
61 pub fn new() -> Self {
67 Self {
68 compact: true,
69 context_len: 3,
70 algorithm: Algorithm::Histogram,
71 original_filename: Some("original".into()),
72 modified_filename: Some("modified".into()),
73 }
74 }
75
76 pub fn set_context_len(&mut self, context_len: usize) -> &mut Self {
78 self.context_len = context_len;
79 self
80 }
81
82 #[allow(dead_code)]
87 fn set_compact(&mut self, compact: bool) -> &mut Self {
88 self.compact = compact;
89 self
90 }
91
92 pub fn set_algorithm(&mut self, algorithm: Algorithm) -> &mut Self {
94 self.algorithm = algorithm;
95 self
96 }
97
98 pub fn set_original_filename<T>(&mut self, filename: T) -> &mut Self
102 where
103 T: Into<Cow<'static, str>>,
104 {
105 self.original_filename = Some(filename.into());
106 self
107 }
108
109 pub fn set_modified_filename<T>(&mut self, filename: T) -> &mut Self
113 where
114 T: Into<Cow<'static, str>>,
115 {
116 self.modified_filename = Some(filename.into());
117 self
118 }
119
120 #[allow(dead_code)]
122 fn diff<'a>(&self, original: &'a str, modified: &'a str) -> Vec<Diff<'a, str>> {
123 let solution = myers::diff(original.as_bytes(), modified.as_bytes());
124
125 let mut solution = solution
126 .into_iter()
127 .map(|diff_range| diff_range.to_str(original, modified))
128 .collect();
129
130 if self.compact {
131 cleanup::compact(&mut solution);
132 }
133
134 solution.into_iter().map(Diff::from).collect()
135 }
136
137 pub fn create_patch<'a>(&self, original: &'a str, modified: &'a str) -> Patch<'a, str> {
139 let input = InternedInput::new(original, modified);
140 let old_lines: Vec<_> = lines_with_terminator(original).collect();
141 let new_lines: Vec<_> = lines_with_terminator(modified).collect();
142
143 let solution = self.diff_interned(&input);
144
145 let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len);
146 Patch::new(
147 self.original_filename.clone(),
148 self.modified_filename.clone(),
149 hunks,
150 )
151 }
152
153 pub fn create_patch_bytes<'a>(
155 &self,
156 original: &'a [u8],
157 modified: &'a [u8],
158 ) -> Patch<'a, [u8]> {
159 let input = InternedInput::new(original, modified);
160 let old_lines: Vec<_> = byte_lines_with_terminator(original).collect();
161 let new_lines: Vec<_> = byte_lines_with_terminator(modified).collect();
162
163 let solution = self.diff_interned(&input);
164
165 let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len);
166
167 fn cow_str_to_bytes(cow: Cow<'static, str>) -> Cow<'static, [u8]> {
169 match cow {
170 Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
171 Cow::Owned(o) => Cow::Owned(o.into_bytes()),
172 }
173 }
174
175 Patch::new(
176 self.original_filename.clone().map(cow_str_to_bytes),
177 self.modified_filename.clone().map(cow_str_to_bytes),
178 hunks,
179 )
180 }
181
182 pub(crate) fn diff_interned<'a, T: Eq + Hash>(
183 &self,
184 input: &'a InternedInput<T>,
185 ) -> Vec<DiffRange<'a, 'a, [Token]>> {
186 let sink = DiffyDiffRangeBuilder::new(input);
187
188 imara_diff::diff(self.algorithm, input, sink)
189 }
190
191 pub(crate) fn diff_tokens<'a>(
192 &self,
193 before: &'a [Token],
194 after: &'a [Token],
195 num_tokens: u32,
196 ) -> Vec<DiffRange<'a, 'a, [Token]>> {
197 let sink = DiffyDiffRangeBuilder::from_tokens(before, after);
198
199 imara_diff::diff_with_tokens(self.algorithm, before, after, num_tokens, sink)
200 }
201}
202
203impl Default for DiffOptions {
204 fn default() -> Self {
205 Self::new()
206 }
207}
208
209#[allow(dead_code)]
211fn diff<'a>(original: &'a str, modified: &'a str) -> Vec<Diff<'a, str>> {
212 DiffOptions::default().diff(original, modified)
213}
214
215pub fn create_patch<'a>(original: &'a str, modified: &'a str) -> Patch<'a, str> {
249 DiffOptions::default().create_patch(original, modified)
250}
251
252pub fn create_patch_bytes<'a>(original: &'a [u8], modified: &'a [u8]) -> Patch<'a, [u8]> {
254 DiffOptions::default().create_patch_bytes(original, modified)
255}
256
257fn to_hunks<'a, T: ?Sized>(
258 lines1: &[&'a T],
259 lines2: &[&'a T],
260 solution: &[DiffRange<[Token]>],
261 context_len: usize,
262) -> Vec<Hunk<'a, T>> {
263 let edit_script = build_edit_script(solution);
264
265 let mut hunks = Vec::new();
266
267 let mut idx = 0;
268 while let Some(mut script) = edit_script.get(idx) {
269 let start1 = script.old.start.saturating_sub(context_len);
270 let start2 = script.new.start.saturating_sub(context_len);
271
272 let (mut end1, mut end2) = calc_end(
273 context_len,
274 lines1.len(),
275 lines2.len(),
276 script.old.end,
277 script.new.end,
278 );
279
280 let mut lines = Vec::new();
281
282 for line in lines2.get(start2..script.new.start).into_iter().flatten() {
284 lines.push(Line::Context(*line));
285 }
286
287 loop {
288 for line in lines1.get(script.old.clone()).into_iter().flatten() {
290 lines.push(Line::Delete(*line));
291 }
292
293 for line in lines2.get(script.new.clone()).into_iter().flatten() {
295 lines.push(Line::Insert(*line));
296 }
297
298 if let Some(s) = edit_script.get(idx + 1) {
299 let start1_next =
301 cmp::min(s.old.start, lines1.len() - 1).saturating_sub(context_len);
302 if start1_next < end1 {
303 for (_i1, i2) in (script.old.end..s.old.start).zip(script.new.end..s.new.start)
305 {
306 if let Some(line) = lines2.get(i2) {
307 lines.push(Line::Context(*line));
308 }
309 }
310
311 let (e1, e2) = calc_end(
313 context_len,
314 lines1.len(),
315 lines2.len(),
316 s.old.end,
317 s.new.end,
318 );
319
320 end1 = e1;
321 end2 = e2;
322 script = s;
323 idx += 1;
324 continue;
325 }
326 }
327
328 break;
329 }
330
331 for line in lines2.get(script.new.end..end2).into_iter().flatten() {
333 lines.push(Line::Context(*line));
334 }
335
336 let len1 = end1 - start1;
337 let old_range = HunkRange::new(if len1 > 0 { start1 + 1 } else { start1 }, len1);
338
339 let len2 = end2 - start2;
340 let new_range = HunkRange::new(if len2 > 0 { start2 + 1 } else { start2 }, len2);
341
342 hunks.push(Hunk::new(old_range, new_range, None, lines));
343 idx += 1;
344 }
345
346 hunks
347}
348
349fn calc_end(
350 context_len: usize,
351 text1_len: usize,
352 text2_len: usize,
353 script1_end: usize,
354 script2_end: usize,
355) -> (usize, usize) {
356 let post_context_len = cmp::min(
357 context_len,
358 cmp::min(
359 text1_len.saturating_sub(script1_end),
360 text2_len.saturating_sub(script2_end),
361 ),
362 );
363
364 let end1 = script1_end + post_context_len;
365 let end2 = script2_end + post_context_len;
366
367 (end1, end2)
368}
369
370#[derive(Debug)]
371struct EditRange {
372 old: ops::Range<usize>,
373 new: ops::Range<usize>,
374}
375
376impl EditRange {
377 fn new(old: ops::Range<usize>, new: ops::Range<usize>) -> Self {
378 Self { old, new }
379 }
380}
381
382fn build_edit_script<T>(solution: &[DiffRange<[T]>]) -> Vec<EditRange> {
383 let mut idx_a = 0;
384 let mut idx_b = 0;
385
386 let mut edit_script: Vec<EditRange> = Vec::new();
387 let mut script = None;
388
389 for diff in solution {
390 match diff {
391 DiffRange::Equal(range1, range2) => {
392 idx_a += range1.len();
393 idx_b += range2.len();
394 if let Some(script) = script.take() {
395 edit_script.push(script);
396 }
397 }
398 DiffRange::Delete(range) => {
399 match script {
400 Some(ref mut s) => s.old.end += range.len(),
401 None => {
402 script = Some(EditRange::new(idx_a..idx_a + range.len(), idx_b..idx_b));
403 }
404 }
405 idx_a += range.len();
406 }
407 DiffRange::Insert(range) => {
408 match script {
409 Some(ref mut s) => s.new.end += range.len(),
410 None => {
411 script = Some(EditRange::new(idx_a..idx_a, idx_b..idx_b + range.len()));
412 }
413 }
414 idx_b += range.len();
415 }
416 }
417 }
418
419 if let Some(script) = script.take() {
420 edit_script.push(script);
421 }
422
423 edit_script
424}
425
426#[cfg(test)]
427mod test {
428 use super::DiffOptions;
429
430 #[test]
431 fn set_original_and_modified_filenames() {
432 let original = "\
433I am afraid, however, that all I have known - that my story - will be forgotten.
434I am afraid for the world that is to come.
435Afraid that my plans will fail.
436Afraid of a doom worse than the Deepness.
437";
438 let modified = "\
439I am afraid, however, that all I have known - that my story - will be forgotten.
440I am afraid for the world that is to come.
441Afraid that Alendi will fail.
442Afraid of a doom brought by the Deepness.
443";
444 let expected = "\
445--- the old version
446+++ the better version
447@@ -1,4 +1,4 @@
448 I am afraid, however, that all I have known - that my story - will be forgotten.
449 I am afraid for the world that is to come.
450-Afraid that my plans will fail.
451-Afraid of a doom worse than the Deepness.
452+Afraid that Alendi will fail.
453+Afraid of a doom brought by the Deepness.
454";
455
456 let patch = DiffOptions::new()
457 .set_original_filename("the old version")
458 .set_modified_filename("the better version")
459 .create_patch(original, modified);
460
461 assert_eq!(patch.to_string(), expected);
462 }
463}