1use hashbrown::HashMap;
4use serde::Serialize;
5use std::cmp::min;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Chunk<'a> {
10 Equal(&'a str),
11 Delete(&'a str),
12 Insert(&'a str),
13}
14
15pub fn compute_diff_chunks<'a>(old: &'a str, new: &'a str) -> Vec<Chunk<'a>> {
17 if old.is_empty() && new.is_empty() {
18 return Vec::with_capacity(0);
19 }
20 if old.is_empty() {
21 return vec![Chunk::Insert(new)];
22 }
23 if new.is_empty() {
24 return vec![Chunk::Delete(old)];
25 }
26
27 let mut prefix_len = 0;
29 for (o, n) in old.chars().zip(new.chars()) {
30 if o == n {
31 prefix_len += o.len_utf8();
32 } else {
33 break;
34 }
35 }
36
37 let mut suffix_len = 0;
38 let old_rest = &old[prefix_len..];
39 let new_rest = &new[prefix_len..];
40
41 let old_chars: Vec<char> = old_rest.chars().collect();
42 let new_chars: Vec<char> = new_rest.chars().collect();
43
44 for (o, n) in old_chars.iter().rev().zip(new_chars.iter().rev()) {
45 if o == n {
46 suffix_len += o.len_utf8();
47 } else {
48 break;
49 }
50 }
51
52 let old_middle_end = old_rest.len() - suffix_len;
53 let new_middle_end = new_rest.len() - suffix_len;
54
55 let old_middle = &old_rest[..old_middle_end];
56 let new_middle = &new_rest[..new_middle_end];
57
58 let mut result = Vec::with_capacity(old_middle.len() + new_middle.len());
59
60 if prefix_len > 0 {
62 result.push(Chunk::Equal(&old[..prefix_len]));
63 }
64
65 if !old_middle.is_empty() || !new_middle.is_empty() {
67 let old_chars: Vec<char> = old_middle.chars().collect();
68 let new_chars: Vec<char> = new_middle.chars().collect();
69 let old_byte_starts: Vec<usize> = old_middle.char_indices().map(|(idx, _)| idx).collect();
70 let new_byte_starts: Vec<usize> = new_middle.char_indices().map(|(idx, _)| idx).collect();
71 let edits = myers_diff(&old_chars, &new_chars);
72
73 let mut old_pos = 0;
74 let mut new_pos = 0;
75
76 for edit in edits {
77 match edit {
78 Edit::Equal => {
79 old_pos += 1;
80 new_pos += 1;
81 }
82 Edit::Delete => {
83 let Some(ch) = old_chars.get(old_pos).copied() else {
84 break;
85 };
86 let Some(byte_start) = old_byte_starts.get(old_pos).copied() else {
87 break;
88 };
89 let byte_end = byte_start + ch.len_utf8();
90 result.push(Chunk::Delete(&old_middle[byte_start..byte_end]));
91 old_pos += 1;
92 }
93 Edit::Insert => {
94 let Some(ch) = new_chars.get(new_pos).copied() else {
95 break;
96 };
97 let Some(byte_start) = new_byte_starts.get(new_pos).copied() else {
98 break;
99 };
100 let byte_end = byte_start + ch.len_utf8();
101 result.push(Chunk::Insert(&new_middle[byte_start..byte_end]));
102 new_pos += 1;
103 }
104 }
105 }
106 }
107
108 if suffix_len > 0 {
110 result.push(Chunk::Equal(&old[old.len() - suffix_len..]));
111 }
112
113 result
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117enum Edit {
118 Equal,
119 Delete,
120 Insert,
121}
122
123#[allow(clippy::cast_sign_loss)]
124fn myers_diff(old: &[char], new: &[char]) -> Vec<Edit> {
125 let n = old.len();
126 let m = new.len();
127
128 if n == 0 {
129 return vec![Edit::Insert; m];
130 }
131 if m == 0 {
132 return vec![Edit::Delete; n];
133 }
134
135 let max_d = n + m;
136 let max_d_i32 = max_d as i32;
137 let mut v = vec![0; 2 * max_d + 1];
138 let mut v_index = vec![0usize; (max_d + 1) * (2 * max_d + 1)];
139 let row_len = 2 * max_d + 1;
140
141 v[max_d] = 0;
142
143 for d in 0..=max_d {
144 let d_i32 = d as i32;
145 let row_start = d * row_len;
146 for k in (-d_i32..=d_i32).step_by(2) {
147 let k_idx = (k + max_d_i32) as usize;
148
149 let x = if k == -d_i32 || (k != d_i32 && v[k_idx - 1] < v[k_idx + 1]) {
150 v[k_idx + 1]
151 } else {
152 v[k_idx - 1] + 1
153 };
154
155 let mut x = x;
156 let mut y = (x as i32 - k) as usize;
157
158 while x < n && y < m && old[x] == new[y] {
159 x += 1;
160 y += 1;
161 }
162
163 v[k_idx] = x;
164 v_index[row_start + k_idx] = x;
165
166 if x >= n && y >= m {
167 return backtrack_myers(old, new, &v_index, d, k, max_d);
168 }
169 }
170 }
171
172 vec![]
173}
174
175#[allow(clippy::cast_sign_loss)]
176fn backtrack_myers(
177 old: &[char],
178 new: &[char],
179 v_index: &[usize],
180 d: usize,
181 mut k: i32,
182 max_d: usize,
183) -> Vec<Edit> {
184 let mut edits = Vec::with_capacity(old.len() + new.len());
185 let mut x = old.len();
186 let mut y = new.len();
187 let max_d_i32 = max_d as i32;
188 let row_len = 2 * max_d + 1;
189
190 for cur_d in (0..=d).rev() {
191 if cur_d == 0 {
192 while x > 0 && y > 0 {
193 edits.push(Edit::Equal);
194 x -= 1;
195 y -= 1;
196 }
197 break;
198 }
199
200 let k_idx = (k + max_d_i32) as usize;
201 let prev_row_start = (cur_d - 1) * row_len;
202
203 let prev_k = if k == -(cur_d as i32)
204 || (k != cur_d as i32
205 && v_index[prev_row_start + k_idx - 1] < v_index[prev_row_start + k_idx + 1])
206 {
207 k + 1
208 } else {
209 k - 1
210 };
211
212 let prev_k_idx = (prev_k + max_d_i32) as usize;
213 let prev_x_val = v_index[prev_row_start + prev_k_idx];
214 let prev_y = (prev_x_val as i32 - prev_k) as usize;
215
216 let (move_x, move_y) = if prev_k == k + 1 {
217 (prev_x_val, prev_y + 1)
218 } else {
219 (prev_x_val + 1, prev_y)
220 };
221
222 while x > move_x && y > move_y {
223 edits.push(Edit::Equal);
224 x -= 1;
225 y -= 1;
226 }
227
228 if prev_k == k + 1 {
229 edits.push(Edit::Insert);
230 y -= 1;
231 } else {
232 edits.push(Edit::Delete);
233 x -= 1;
234 }
235
236 k = prev_k;
237 }
238
239 edits.reverse();
240 edits
241}
242
243#[derive(Debug, Clone)]
245pub struct DiffOptions<'a> {
246 pub context_lines: usize,
247 pub old_label: Option<&'a str>,
248 pub new_label: Option<&'a str>,
249 pub missing_newline_hint: bool,
250}
251
252impl Default for DiffOptions<'_> {
253 fn default() -> Self {
254 Self {
255 context_lines: 3,
256 old_label: None,
257 new_label: None,
258 missing_newline_hint: true,
259 }
260 }
261}
262
263#[derive(Debug, Clone, Serialize)]
265pub struct DiffBundle {
266 pub hunks: Vec<DiffHunk>,
267 pub formatted: String,
268 pub is_empty: bool,
269}
270
271#[derive(Debug, Clone, Serialize)]
273pub struct DiffHunk {
274 pub old_start: usize,
275 pub old_lines: usize,
276 pub new_start: usize,
277 pub new_lines: usize,
278 pub lines: Vec<DiffLine>,
279}
280
281#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
283#[serde(rename_all = "snake_case")]
284pub enum DiffLineKind {
285 Context,
286 Addition,
287 Deletion,
288}
289
290#[derive(Debug, Clone, Serialize)]
292pub struct DiffLine {
293 pub kind: DiffLineKind,
294 pub old_line: Option<usize>,
295 pub new_line: Option<usize>,
296 pub text: String,
297}
298
299pub fn compute_diff<F>(old: &str, new: &str, options: DiffOptions<'_>, formatter: F) -> DiffBundle
301where
302 F: FnOnce(&[DiffHunk], &DiffOptions<'_>) -> String,
303{
304 let old_lines_owned = split_lines_with_terminator(old);
305 let new_lines_owned = split_lines_with_terminator(new);
306
307 let old_refs: Vec<&str> = old_lines_owned.iter().map(|s| s.as_str()).collect();
308 let new_refs: Vec<&str> = new_lines_owned.iter().map(|s| s.as_str()).collect();
309
310 let records = collect_line_records(&old_refs, &new_refs);
311 let has_changes = records
312 .iter()
313 .any(|record| matches!(record.kind, DiffLineKind::Addition | DiffLineKind::Deletion));
314
315 let hunks = if has_changes {
316 build_hunks(&records, options.context_lines)
317 } else {
318 Vec::new()
319 };
320
321 let formatted = if hunks.is_empty() {
322 String::new()
323 } else {
324 formatter(&hunks, &options)
325 };
326
327 DiffBundle {
328 hunks,
329 formatted,
330 is_empty: !has_changes,
331 }
332}
333
334fn split_lines_with_terminator(text: &str) -> Vec<String> {
335 if text.is_empty() {
336 return Vec::with_capacity(0);
337 }
338
339 let mut lines: Vec<String> = text
340 .split_inclusive('\n')
341 .map(|line| line.to_string())
342 .collect();
343
344 if lines.is_empty() {
345 lines.push(text.to_string());
346 }
347
348 lines
349}
350
351fn collect_line_records<'a>(
352 old_lines: &'a [&'a str],
353 new_lines: &'a [&'a str],
354) -> Vec<LineRecord<'a>> {
355 let (old_encoded, new_encoded) = encode_line_sequences(old_lines, new_lines);
356 let mut records = Vec::with_capacity(old_lines.len() + new_lines.len());
357 let mut old_index = 0usize;
358 let mut new_index = 0usize;
359
360 for chunk in compute_diff_chunks(old_encoded.as_str(), new_encoded.as_str()) {
361 match chunk {
362 Chunk::Equal(text) => {
363 for _ in text.chars() {
364 let old_line = old_index + 1;
365 let new_line = new_index + 1;
366 let line = old_lines[old_index];
367 records.push(LineRecord {
368 kind: DiffLineKind::Context,
369 old_line: Some(old_line),
370 new_line: Some(new_line),
371 text: line,
372 anchor_old: old_line,
373 anchor_new: new_line,
374 });
375 old_index += 1;
376 new_index += 1;
377 }
378 }
379 Chunk::Delete(text) => {
380 for _ in text.chars() {
381 let old_line = old_index + 1;
382 let anchor_new = new_index + 1;
383 let line = old_lines[old_index];
384 records.push(LineRecord {
385 kind: DiffLineKind::Deletion,
386 old_line: Some(old_line),
387 new_line: None,
388 text: line,
389 anchor_old: old_line,
390 anchor_new,
391 });
392 old_index += 1;
393 }
394 }
395 Chunk::Insert(text) => {
396 for _ in text.chars() {
397 let new_line = new_index + 1;
398 let anchor_old = old_index + 1;
399 let line = new_lines[new_index];
400 records.push(LineRecord {
401 kind: DiffLineKind::Addition,
402 old_line: None,
403 new_line: Some(new_line),
404 text: line,
405 anchor_old,
406 anchor_new: new_line,
407 });
408 new_index += 1;
409 }
410 }
411 }
412 }
413
414 records
415}
416
417fn encode_line_sequences<'a>(
418 old_lines: &'a [&'a str],
419 new_lines: &'a [&'a str],
420) -> (String, String) {
421 let mut token_map: HashMap<&'a str, char> = HashMap::new();
422 let mut next_codepoint: u32 = 0;
423
424 let old_encoded = encode_line_list(old_lines, &mut token_map, &mut next_codepoint);
425 let new_encoded = encode_line_list(new_lines, &mut token_map, &mut next_codepoint);
426
427 (old_encoded, new_encoded)
428}
429
430fn encode_line_list<'a>(
431 lines: &'a [&'a str],
432 map: &mut HashMap<&'a str, char>,
433 next_codepoint: &mut u32,
434) -> String {
435 let mut encoded = String::with_capacity(lines.len());
436 for &line in lines {
437 let token = if let Some(&value) = map.get(line) {
438 value
439 } else {
440 let Some(ch) = next_token_char(next_codepoint) else {
441 break;
442 };
443 map.insert(line, ch);
444 ch
445 };
446 encoded.push(token);
447 }
448 encoded
449}
450
451fn next_token_char(counter: &mut u32) -> Option<char> {
452 while *counter <= 0x10FFFF {
453 let candidate = *counter;
454 *counter += 1;
455 if (0xD800..=0xDFFF).contains(&candidate) {
456 continue;
457 }
458 if let Some(ch) = char::from_u32(candidate) {
459 return Some(ch);
460 }
461 }
462 None
463}
464
465#[derive(Debug)]
466struct LineRecord<'a> {
467 kind: DiffLineKind,
468 old_line: Option<usize>,
469 new_line: Option<usize>,
470 text: &'a str,
471 anchor_old: usize,
472 anchor_new: usize,
473}
474
475fn build_hunks(records: &[LineRecord<'_>], context: usize) -> Vec<DiffHunk> {
476 if records.is_empty() {
477 return Vec::new();
478 }
479
480 let ranges = compute_hunk_ranges(records, context);
481 let mut hunks = Vec::with_capacity(ranges.len());
482
483 for (start, end) in ranges {
484 let slice = &records[start..=end];
485
486 let old_start = slice
487 .iter()
488 .filter_map(|r| r.old_line)
489 .min()
490 .or_else(|| slice.iter().map(|r| r.anchor_old).min())
491 .unwrap_or(1);
492
493 let new_start = slice
494 .iter()
495 .filter_map(|r| r.new_line)
496 .min()
497 .or_else(|| slice.iter().map(|r| r.anchor_new).min())
498 .unwrap_or(1);
499
500 let old_lines = slice
501 .iter()
502 .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Deletion))
503 .count();
504 let new_lines = slice
505 .iter()
506 .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Addition))
507 .count();
508
509 let lines = slice
510 .iter()
511 .map(|record| DiffLine {
512 kind: record.kind.clone(),
513 old_line: record.old_line,
514 new_line: record.new_line,
515 text: record.text.to_string(),
516 })
517 .collect();
518
519 hunks.push(DiffHunk {
520 old_start,
521 old_lines,
522 new_start,
523 new_lines,
524 lines,
525 });
526 }
527
528 hunks
529}
530
531fn compute_hunk_ranges(records: &[LineRecord<'_>], context: usize) -> Vec<(usize, usize)> {
532 let mut ranges = Vec::with_capacity(4);
533 let mut current_start: Option<usize> = None;
534 let mut current_end: usize = 0;
535
536 for (idx, record) in records.iter().enumerate() {
537 if record.kind != DiffLineKind::Context {
538 let start = idx.saturating_sub(context);
539 let end = min(idx + context, records.len().saturating_sub(1));
540
541 if let Some(existing_start) = current_start {
542 if start < existing_start {
543 current_start = Some(start);
544 }
545 if end > current_end {
546 current_end = end;
547 }
548 } else {
549 current_start = Some(start);
550 current_end = end;
551 }
552 } else if let Some(start) = current_start
553 && idx > current_end
554 {
555 ranges.push((start, current_end));
556 current_start = None;
557 }
558 }
559
560 if let Some(start) = current_start {
561 ranges.push((start, current_end));
562 }
563
564 ranges
565}