1use serde::Serialize;
4use std::cmp::min;
5use std::collections::HashMap;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Chunk<'a> {
10 Equal(&'a str),
11 Delete(&'a str),
12 Insert(&'a str),
13}
14
15pub fn compute_diff_chunks<'a>(old: &'a str, new: &'a str) -> Vec<Chunk<'a>> {
17 if old.is_empty() && new.is_empty() {
18 return Vec::with_capacity(0);
19 }
20 if old.is_empty() {
21 return vec![Chunk::Insert(new)];
22 }
23 if new.is_empty() {
24 return vec![Chunk::Delete(old)];
25 }
26
27 let mut prefix_len = 0;
29 for (o, n) in old.chars().zip(new.chars()) {
30 if o == n {
31 prefix_len += o.len_utf8();
32 } else {
33 break;
34 }
35 }
36
37 let mut suffix_len = 0;
38 let old_rest = &old[prefix_len..];
39 let new_rest = &new[prefix_len..];
40
41 let old_chars: Vec<char> = old_rest.chars().collect();
42 let new_chars: Vec<char> = new_rest.chars().collect();
43
44 for (o, n) in old_chars.iter().rev().zip(new_chars.iter().rev()) {
45 if o == n {
46 suffix_len += o.len_utf8();
47 } else {
48 break;
49 }
50 }
51
52 let old_middle_end = old_rest.len() - suffix_len;
53 let new_middle_end = new_rest.len() - suffix_len;
54
55 let old_middle = &old_rest[..old_middle_end];
56 let new_middle = &new_rest[..new_middle_end];
57
58 let mut result = Vec::with_capacity(old_middle.len() + new_middle.len());
59
60 if prefix_len > 0 {
62 result.push(Chunk::Equal(&old[..prefix_len]));
63 }
64
65 if !old_middle.is_empty() || !new_middle.is_empty() {
67 let old_chars: Vec<char> = old_middle.chars().collect();
68 let new_chars: Vec<char> = new_middle.chars().collect();
69 let edits = myers_diff(&old_chars, &new_chars);
70
71 let mut old_pos = 0;
72 let mut new_pos = 0;
73
74 for edit in edits {
75 match edit {
76 Edit::Equal => {
77 old_pos += 1;
78 new_pos += 1;
79 }
80 Edit::Delete => {
81 let ch = old_chars[old_pos];
82 let byte_start = old_middle.char_indices().nth(old_pos).unwrap().0;
83 let byte_end = byte_start + ch.len_utf8();
84 result.push(Chunk::Delete(&old_middle[byte_start..byte_end]));
85 old_pos += 1;
86 }
87 Edit::Insert => {
88 let ch = new_chars[new_pos];
89 let byte_start = new_middle.char_indices().nth(new_pos).unwrap().0;
90 let byte_end = byte_start + ch.len_utf8();
91 result.push(Chunk::Insert(&new_middle[byte_start..byte_end]));
92 new_pos += 1;
93 }
94 }
95 }
96 }
97
98 if suffix_len > 0 {
100 result.push(Chunk::Equal(&old[old.len() - suffix_len..]));
101 }
102
103 result
104}
105
106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
107enum Edit {
108 Equal,
109 Delete,
110 Insert,
111}
112
113fn myers_diff(old: &[char], new: &[char]) -> Vec<Edit> {
114 let n = old.len();
115 let m = new.len();
116
117 if n == 0 {
118 return vec![Edit::Insert; m];
119 }
120 if m == 0 {
121 return vec![Edit::Delete; n];
122 }
123
124 let max_d = n + m;
125 let mut v = vec![0; 2 * max_d + 1];
126 let mut v_index = vec![0usize; (max_d + 1) * (2 * max_d + 1)];
127 let row_len = 2 * max_d + 1;
128
129 v[max_d] = 0;
130
131 for d in 0..=max_d {
132 for k in (-(d as i32)..=(d as i32)).step_by(2) {
133 let k_idx = (k + max_d as i32) as usize;
134
135 let x = if k == -(d as i32) || (k != d as i32 && v[k_idx - 1] < v[k_idx + 1]) {
136 v[k_idx + 1]
137 } else {
138 v[k_idx - 1] + 1
139 };
140
141 let mut x = x;
142 let mut y = (x as i32 - k) as usize;
143
144 while x < n && y < m && old[x] == new[y] {
145 x += 1;
146 y += 1;
147 }
148
149 v[k_idx] = x;
150 v_index[d * row_len + k_idx] = x;
151
152 if x >= n && y >= m {
153 return backtrack_myers(old, new, &v_index, d, k, max_d);
154 }
155 }
156 }
157
158 vec![]
159}
160
161fn backtrack_myers(
162 old: &[char],
163 new: &[char],
164 v_index: &[usize],
165 d: usize,
166 mut k: i32,
167 max_d: usize,
168) -> Vec<Edit> {
169 let mut edits = Vec::with_capacity(old.len() + new.len());
170 let mut x = old.len();
171 let mut y = new.len();
172 let row_len = 2 * max_d + 1;
173
174 for cur_d in (0..=d).rev() {
175 if cur_d == 0 {
176 while x > 0 && y > 0 {
177 edits.push(Edit::Equal);
178 x -= 1;
179 y -= 1;
180 }
181 break;
182 }
183
184 let k_idx = (k + max_d as i32) as usize;
185
186 let prev_k = if k == -(cur_d as i32)
187 || (k != cur_d as i32
188 && v_index[(cur_d - 1) * row_len + k_idx - 1]
189 < v_index[(cur_d - 1) * row_len + k_idx + 1])
190 {
191 k + 1
192 } else {
193 k - 1
194 };
195
196 let prev_k_idx = (prev_k + max_d as i32) as usize;
197 let prev_x_val = v_index[(cur_d - 1) * row_len + prev_k_idx];
198 let prev_y = (prev_x_val as i32 - prev_k) as usize;
199
200 let (move_x, move_y) = if prev_k == k + 1 {
201 (prev_x_val, prev_y + 1)
202 } else {
203 (prev_x_val + 1, prev_y)
204 };
205
206 while x > move_x && y > move_y {
207 edits.push(Edit::Equal);
208 x -= 1;
209 y -= 1;
210 }
211
212 if prev_k == k + 1 {
213 edits.push(Edit::Insert);
214 y -= 1;
215 } else {
216 edits.push(Edit::Delete);
217 x -= 1;
218 }
219
220 k = prev_k;
221 }
222
223 edits.reverse();
224 edits
225}
226
227#[derive(Debug, Clone)]
229pub struct DiffOptions<'a> {
230 pub context_lines: usize,
231 pub old_label: Option<&'a str>,
232 pub new_label: Option<&'a str>,
233 pub missing_newline_hint: bool,
234}
235
236impl Default for DiffOptions<'_> {
237 fn default() -> Self {
238 Self {
239 context_lines: 3,
240 old_label: None,
241 new_label: None,
242 missing_newline_hint: true,
243 }
244 }
245}
246
247#[derive(Debug, Clone, Serialize)]
249pub struct DiffBundle {
250 pub hunks: Vec<DiffHunk>,
251 pub formatted: String,
252 pub is_empty: bool,
253}
254
255#[derive(Debug, Clone, Serialize)]
257pub struct DiffHunk {
258 pub old_start: usize,
259 pub old_lines: usize,
260 pub new_start: usize,
261 pub new_lines: usize,
262 pub lines: Vec<DiffLine>,
263}
264
265#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
267#[serde(rename_all = "snake_case")]
268pub enum DiffLineKind {
269 Context,
270 Addition,
271 Deletion,
272}
273
274#[derive(Debug, Clone, Serialize)]
276pub struct DiffLine {
277 pub kind: DiffLineKind,
278 pub old_line: Option<usize>,
279 pub new_line: Option<usize>,
280 pub text: String,
281}
282
283pub fn compute_diff<F>(old: &str, new: &str, options: DiffOptions<'_>, formatter: F) -> DiffBundle
285where
286 F: FnOnce(&[DiffHunk], &DiffOptions<'_>) -> String,
287{
288 let old_lines_owned = split_lines_with_terminator(old);
289 let new_lines_owned = split_lines_with_terminator(new);
290
291 let old_refs: Vec<&str> = old_lines_owned.iter().map(|s| s.as_str()).collect();
292 let new_refs: Vec<&str> = new_lines_owned.iter().map(|s| s.as_str()).collect();
293
294 let records = collect_line_records(&old_refs, &new_refs);
295 let has_changes = records
296 .iter()
297 .any(|record| matches!(record.kind, DiffLineKind::Addition | DiffLineKind::Deletion));
298
299 let hunks = if has_changes {
300 build_hunks(&records, options.context_lines)
301 } else {
302 Vec::new()
303 };
304
305 let formatted = if hunks.is_empty() {
306 String::new()
307 } else {
308 formatter(&hunks, &options)
309 };
310
311 DiffBundle {
312 hunks,
313 formatted,
314 is_empty: !has_changes,
315 }
316}
317
318fn split_lines_with_terminator(text: &str) -> Vec<String> {
319 if text.is_empty() {
320 return Vec::with_capacity(0);
321 }
322
323 let mut lines: Vec<String> = text
324 .split_inclusive('\n')
325 .map(|line| line.to_string())
326 .collect();
327
328 if lines.is_empty() {
329 lines.push(text.to_string());
330 }
331
332 lines
333}
334
335fn collect_line_records<'a>(
336 old_lines: &'a [&'a str],
337 new_lines: &'a [&'a str],
338) -> Vec<LineRecord<'a>> {
339 let (old_encoded, new_encoded) = encode_line_sequences(old_lines, new_lines);
340 let mut records = Vec::with_capacity(old_lines.len() + new_lines.len());
341 let mut old_index = 0usize;
342 let mut new_index = 0usize;
343
344 for chunk in compute_diff_chunks(old_encoded.as_str(), new_encoded.as_str()) {
345 match chunk {
346 Chunk::Equal(text) => {
347 for _ in text.chars() {
348 let old_line = old_index + 1;
349 let new_line = new_index + 1;
350 let line = old_lines[old_index];
351 records.push(LineRecord {
352 kind: DiffLineKind::Context,
353 old_line: Some(old_line),
354 new_line: Some(new_line),
355 text: line,
356 anchor_old: old_line,
357 anchor_new: new_line,
358 });
359 old_index += 1;
360 new_index += 1;
361 }
362 }
363 Chunk::Delete(text) => {
364 for _ in text.chars() {
365 let old_line = old_index + 1;
366 let anchor_new = new_index + 1;
367 let line = old_lines[old_index];
368 records.push(LineRecord {
369 kind: DiffLineKind::Deletion,
370 old_line: Some(old_line),
371 new_line: None,
372 text: line,
373 anchor_old: old_line,
374 anchor_new,
375 });
376 old_index += 1;
377 }
378 }
379 Chunk::Insert(text) => {
380 for _ in text.chars() {
381 let new_line = new_index + 1;
382 let anchor_old = old_index + 1;
383 let line = new_lines[new_index];
384 records.push(LineRecord {
385 kind: DiffLineKind::Addition,
386 old_line: None,
387 new_line: Some(new_line),
388 text: line,
389 anchor_old,
390 anchor_new: new_line,
391 });
392 new_index += 1;
393 }
394 }
395 }
396 }
397
398 records
399}
400
401fn encode_line_sequences<'a>(
402 old_lines: &'a [&'a str],
403 new_lines: &'a [&'a str],
404) -> (String, String) {
405 let mut token_map: HashMap<&'a str, char> = HashMap::new();
406 let mut next_codepoint: u32 = 0;
407
408 let old_encoded = encode_line_list(old_lines, &mut token_map, &mut next_codepoint);
409 let new_encoded = encode_line_list(new_lines, &mut token_map, &mut next_codepoint);
410
411 (old_encoded, new_encoded)
412}
413
414fn encode_line_list<'a>(
415 lines: &'a [&'a str],
416 map: &mut HashMap<&'a str, char>,
417 next_codepoint: &mut u32,
418) -> String {
419 let mut encoded = String::with_capacity(lines.len());
420 for &line in lines {
421 let token = if let Some(&value) = map.get(line) {
422 value
423 } else {
424 let ch = next_token_char(next_codepoint).expect("exceeded diff token capacity");
425 map.insert(line, ch);
426 ch
427 };
428 encoded.push(token);
429 }
430 encoded
431}
432
433fn next_token_char(counter: &mut u32) -> Option<char> {
434 while *counter <= 0x10FFFF {
435 let candidate = *counter;
436 *counter += 1;
437 if (0xD800..=0xDFFF).contains(&candidate) {
438 continue;
439 }
440 if let Some(ch) = char::from_u32(candidate) {
441 return Some(ch);
442 }
443 }
444 None
445}
446
447#[derive(Debug)]
448struct LineRecord<'a> {
449 kind: DiffLineKind,
450 old_line: Option<usize>,
451 new_line: Option<usize>,
452 text: &'a str,
453 anchor_old: usize,
454 anchor_new: usize,
455}
456
457fn build_hunks(records: &[LineRecord<'_>], context: usize) -> Vec<DiffHunk> {
458 if records.is_empty() {
459 return Vec::new();
460 }
461
462 let ranges = compute_hunk_ranges(records, context);
463 let mut hunks = Vec::with_capacity(ranges.len());
464
465 for (start, end) in ranges {
466 let slice = &records[start..=end];
467
468 let old_start = slice
469 .iter()
470 .filter_map(|r| r.old_line)
471 .min()
472 .or_else(|| slice.iter().map(|r| r.anchor_old).min())
473 .unwrap_or(1);
474
475 let new_start = slice
476 .iter()
477 .filter_map(|r| r.new_line)
478 .min()
479 .or_else(|| slice.iter().map(|r| r.anchor_new).min())
480 .unwrap_or(1);
481
482 let old_lines = slice
483 .iter()
484 .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Deletion))
485 .count();
486 let new_lines = slice
487 .iter()
488 .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Addition))
489 .count();
490
491 let lines = slice
492 .iter()
493 .map(|record| DiffLine {
494 kind: record.kind.clone(),
495 old_line: record.old_line,
496 new_line: record.new_line,
497 text: record.text.to_string(),
498 })
499 .collect();
500
501 hunks.push(DiffHunk {
502 old_start,
503 old_lines,
504 new_start,
505 new_lines,
506 lines,
507 });
508 }
509
510 hunks
511}
512
513fn compute_hunk_ranges(records: &[LineRecord<'_>], context: usize) -> Vec<(usize, usize)> {
514 let mut ranges = Vec::with_capacity(4);
515 let mut current_start: Option<usize> = None;
516 let mut current_end: usize = 0;
517
518 for (idx, record) in records.iter().enumerate() {
519 if record.kind != DiffLineKind::Context {
520 let start = idx.saturating_sub(context);
521 let end = min(idx + context, records.len().saturating_sub(1));
522
523 if let Some(existing_start) = current_start {
524 if start < existing_start {
525 current_start = Some(start);
526 }
527 if end > current_end {
528 current_end = end;
529 }
530 } else {
531 current_start = Some(start);
532 current_end = end;
533 }
534 } else if let Some(start) = current_start
535 && idx > current_end
536 {
537 ranges.push((start, current_end));
538 current_start = None;
539 }
540 }
541
542 if let Some(start) = current_start {
543 ranges.push((start, current_end));
544 }
545
546 ranges
547}