1use hashbrown::HashMap;
4use serde::Serialize;
5use std::cmp::min;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Chunk<'a> {
10 Equal(&'a str),
11 Delete(&'a str),
12 Insert(&'a str),
13}
14
15#[inline]
17pub fn compute_diff_chunks<'a>(old: &'a str, new: &'a str) -> Vec<Chunk<'a>> {
18 if old.is_empty() && new.is_empty() {
19 return Vec::with_capacity(0);
20 }
21 if old.is_empty() {
22 return vec![Chunk::Insert(new)];
23 }
24 if new.is_empty() {
25 return vec![Chunk::Delete(old)];
26 }
27
28 let prefix_byte_len: usize = old
30 .chars()
31 .zip(new.chars())
32 .take_while(|(o, n)| o == n)
33 .map(|(c, _)| c.len_utf8())
34 .sum();
35
36 let old_rest = &old[prefix_byte_len..];
38 let new_rest = &new[prefix_byte_len..];
39
40 let suffix_byte_len: usize = old_rest
41 .chars()
42 .rev()
43 .zip(new_rest.chars().rev())
44 .take_while(|(o, n)| o == n)
45 .map(|(c, _)| c.len_utf8())
46 .sum();
47
48 let old_middle_end = old_rest.len() - suffix_byte_len;
49 let new_middle_end = new_rest.len() - suffix_byte_len;
50
51 let old_middle = &old_rest[..old_middle_end];
52 let new_middle = &new_rest[..new_middle_end];
53
54 let mut result = Vec::with_capacity(old_middle.len() + new_middle.len());
55
56 if prefix_byte_len > 0 {
58 result.push(Chunk::Equal(&old[..prefix_byte_len]));
59 }
60
61 if !old_middle.is_empty() || !new_middle.is_empty() {
63 let old_chars: Vec<char> = old_middle.chars().collect();
64 let new_chars: Vec<char> = new_middle.chars().collect();
65 let old_byte_starts: Vec<usize> = old_middle.char_indices().map(|(idx, _)| idx).collect();
66 let new_byte_starts: Vec<usize> = new_middle.char_indices().map(|(idx, _)| idx).collect();
67 let edits = myers_diff(&old_chars, &new_chars);
68
69 let mut old_pos = 0;
70 let mut new_pos = 0;
71
72 for edit in edits {
73 match edit {
74 Edit::Equal => {
75 old_pos += 1;
76 new_pos += 1;
77 }
78 Edit::Delete => {
79 let Some(ch) = old_chars.get(old_pos).copied() else {
80 break;
81 };
82 let Some(byte_start) = old_byte_starts.get(old_pos).copied() else {
83 break;
84 };
85 let byte_end = byte_start + ch.len_utf8();
86 result.push(Chunk::Delete(&old_middle[byte_start..byte_end]));
87 old_pos += 1;
88 }
89 Edit::Insert => {
90 let Some(ch) = new_chars.get(new_pos).copied() else {
91 break;
92 };
93 let Some(byte_start) = new_byte_starts.get(new_pos).copied() else {
94 break;
95 };
96 let byte_end = byte_start + ch.len_utf8();
97 result.push(Chunk::Insert(&new_middle[byte_start..byte_end]));
98 new_pos += 1;
99 }
100 }
101 }
102 }
103
104 if suffix_byte_len > 0 {
106 result.push(Chunk::Equal(&old[old.len() - suffix_byte_len..]));
107 }
108
109 result
110}
111
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113enum Edit {
114 Equal,
115 Delete,
116 Insert,
117}
118
119#[inline]
123fn advance_matching(old: &[char], new: &[char], mut x: usize, mut y: usize) -> (usize, usize) {
124 while x < old.len() && y < new.len() && old[x] == new[y] {
125 x += 1;
126 y += 1;
127 }
128 (x, y)
129}
130
131#[inline]
136fn backtrack_equal_run(
137 mut x: usize,
138 mut y: usize,
139 move_x: usize,
140 move_y: usize,
141 edits: &mut Vec<Edit>,
142) -> (usize, usize) {
143 while x > move_x && y > move_y {
144 edits.push(Edit::Equal);
145 x -= 1;
146 y -= 1;
147 }
148 (x, y)
149}
150
151#[allow(clippy::cast_sign_loss)]
152fn myers_diff(old: &[char], new: &[char]) -> Vec<Edit> {
153 let n = old.len();
154 let m = new.len();
155
156 if n == 0 {
157 return vec![Edit::Insert; m];
158 }
159 if m == 0 {
160 return vec![Edit::Delete; n];
161 }
162
163 let max_d = n.saturating_add(m).min(i32::MAX as usize);
164 let max_d_i32 = max_d as i32;
165 let mut v = vec![0; 2 * max_d + 1];
166 let mut v_index = vec![0usize; (max_d + 1) * (2 * max_d + 1)];
167 let row_len = 2 * max_d + 1;
168
169 v[max_d] = 0;
170
171 for d in 0..=max_d {
172 let d_i32 = d as i32;
173 let row_start = d * row_len;
174 for k in (-d_i32..=d_i32).step_by(2) {
175 let k_idx = (k + max_d_i32) as usize;
176
177 let x = if k == -d_i32 || (k != d_i32 && v[k_idx - 1] < v[k_idx + 1]) {
178 v[k_idx + 1]
179 } else {
180 v[k_idx - 1] + 1
181 };
182
183 let mut x = x;
184 let mut y = (x as i32 - k) as usize;
185
186 (x, y) = advance_matching(old, new, x, y);
187
188 v[k_idx] = x;
189 v_index[row_start + k_idx] = x;
190
191 if x >= n && y >= m {
192 return backtrack_myers(old, new, &v_index, d, k, max_d);
193 }
194 }
195 }
196
197 vec![]
198}
199
200#[allow(clippy::cast_sign_loss)]
201fn backtrack_myers(
202 old: &[char],
203 new: &[char],
204 v_index: &[usize],
205 d: usize,
206 mut k: i32,
207 max_d: usize,
208) -> Vec<Edit> {
209 let mut edits = Vec::with_capacity(old.len() + new.len());
210 let mut x = old.len();
211 let mut y = new.len();
212 let max_d_i32 = max_d as i32;
213 let row_len = 2 * max_d + 1;
214
215 for cur_d in (0..=d).rev() {
216 if cur_d == 0 {
217 while x > 0 && y > 0 {
218 edits.push(Edit::Equal);
219 x -= 1;
220 y -= 1;
221 }
222 break;
223 }
224
225 let k_idx = (k + max_d_i32) as usize;
226 let prev_row_start = (cur_d - 1) * row_len;
227
228 let cur_d_i32 = cur_d as i32;
229 let prev_k = if k == cur_d_i32.wrapping_neg()
230 || (k != cur_d_i32
231 && v_index[prev_row_start + k_idx - 1] < v_index[prev_row_start + k_idx + 1])
232 {
233 k + 1
234 } else {
235 k - 1
236 };
237
238 let prev_k_idx = (prev_k + max_d_i32) as usize;
239 let prev_x_val = v_index[prev_row_start + prev_k_idx];
240 let prev_y = (prev_x_val as i32 - prev_k) as usize;
241
242 let (move_x, move_y) = if prev_k == k + 1 {
243 (prev_x_val, prev_y + 1)
244 } else {
245 (prev_x_val + 1, prev_y)
246 };
247
248 (x, y) = backtrack_equal_run(x, y, move_x, move_y, &mut edits);
249
250 if prev_k == k + 1 {
251 edits.push(Edit::Insert);
252 y -= 1;
253 } else {
254 edits.push(Edit::Delete);
255 x -= 1;
256 }
257
258 k = prev_k;
259 }
260
261 edits.reverse();
262 edits
263}
264
265#[derive(Debug, Clone)]
267pub struct DiffOptions<'a> {
268 pub context_lines: usize,
269 pub old_label: Option<&'a str>,
270 pub new_label: Option<&'a str>,
271 pub missing_newline_hint: bool,
272}
273
274impl Default for DiffOptions<'_> {
275 fn default() -> Self {
276 Self {
277 context_lines: 3,
278 old_label: None,
279 new_label: None,
280 missing_newline_hint: true,
281 }
282 }
283}
284
285#[derive(Debug, Clone, Serialize)]
287pub struct DiffBundle {
288 pub hunks: Vec<DiffHunk>,
289 pub formatted: String,
290 pub is_empty: bool,
291}
292
293#[derive(Debug, Clone, Serialize)]
295pub struct DiffHunk {
296 pub old_start: usize,
297 pub old_lines: usize,
298 pub new_start: usize,
299 pub new_lines: usize,
300 pub lines: Vec<DiffLine>,
301}
302
303#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
305#[serde(rename_all = "snake_case")]
306pub enum DiffLineKind {
307 Context,
308 Addition,
309 Deletion,
310}
311
312#[derive(Debug, Clone, Serialize)]
314pub struct DiffLine {
315 pub kind: DiffLineKind,
316 pub old_line: Option<u32>,
317 pub new_line: Option<u32>,
318 pub text: String,
319}
320
321pub fn compute_diff<F>(old: &str, new: &str, options: DiffOptions<'_>, formatter: F) -> DiffBundle
323where
324 F: FnOnce(&[DiffHunk], &DiffOptions<'_>) -> String,
325{
326 let old_lines_owned = split_lines_with_terminator(old);
327 let new_lines_owned = split_lines_with_terminator(new);
328
329 let old_refs: Vec<&str> = old_lines_owned.iter().map(|s| s.as_str()).collect();
330 let new_refs: Vec<&str> = new_lines_owned.iter().map(|s| s.as_str()).collect();
331
332 let records = collect_line_records(&old_refs, &new_refs);
333 let has_changes = records
334 .iter()
335 .any(|record| matches!(record.kind, DiffLineKind::Addition | DiffLineKind::Deletion));
336
337 let hunks = if has_changes {
338 build_hunks(&records, options.context_lines)
339 } else {
340 Vec::new()
341 };
342
343 let formatted = if hunks.is_empty() {
344 String::new()
345 } else {
346 formatter(&hunks, &options)
347 };
348
349 DiffBundle {
350 hunks,
351 formatted,
352 is_empty: !has_changes,
353 }
354}
355
356fn split_lines_with_terminator(text: &str) -> Vec<String> {
357 if text.is_empty() {
358 return Vec::with_capacity(0);
359 }
360
361 let mut lines: Vec<String> = text
362 .split_inclusive('\n')
363 .map(|line| line.to_string())
364 .collect();
365
366 if lines.is_empty() {
367 lines.push(text.to_string());
368 }
369
370 lines
371}
372
373#[inline]
374fn collect_line_records<'a>(
375 old_lines: &'a [&'a str],
376 new_lines: &'a [&'a str],
377) -> Vec<LineRecord<'a>> {
378 let (old_encoded, new_encoded) = encode_line_sequences(old_lines, new_lines);
379 let mut records = Vec::with_capacity(old_lines.len() + new_lines.len());
380 let mut old_index = 0u32;
381 let mut new_index = 0u32;
382
383 for chunk in compute_diff_chunks(old_encoded.as_str(), new_encoded.as_str()) {
384 match chunk {
385 Chunk::Equal(text) => {
386 for _ in text.chars() {
387 let old_line = old_index + 1;
388 let new_line = new_index + 1;
389 let line = old_lines[old_index as usize];
390 records.push(LineRecord {
391 kind: DiffLineKind::Context,
392 old_line: Some(old_line),
393 new_line: Some(new_line),
394 text: line,
395 anchor_old: old_line,
396 anchor_new: new_line,
397 });
398 old_index += 1;
399 new_index += 1;
400 }
401 }
402 Chunk::Delete(text) => {
403 for _ in text.chars() {
404 let old_line = old_index + 1;
405 let anchor_new = new_index + 1;
406 let line = old_lines[old_index as usize];
407 records.push(LineRecord {
408 kind: DiffLineKind::Deletion,
409 old_line: Some(old_line),
410 new_line: None,
411 text: line,
412 anchor_old: old_line,
413 anchor_new,
414 });
415 old_index += 1;
416 }
417 }
418 Chunk::Insert(text) => {
419 for _ in text.chars() {
420 let new_line = new_index + 1;
421 let anchor_old = old_index + 1;
422 let line = new_lines[new_index as usize];
423 records.push(LineRecord {
424 kind: DiffLineKind::Addition,
425 old_line: None,
426 new_line: Some(new_line),
427 text: line,
428 anchor_old,
429 anchor_new: new_line,
430 });
431 new_index += 1;
432 }
433 }
434 }
435 }
436
437 records
438}
439
440fn encode_line_sequences<'a>(
441 old_lines: &'a [&'a str],
442 new_lines: &'a [&'a str],
443) -> (String, String) {
444 let mut token_map: HashMap<&'a str, char> = HashMap::new();
445 let mut next_codepoint: u32 = 0;
446
447 let old_encoded = encode_line_list(old_lines, &mut token_map, &mut next_codepoint);
448 let new_encoded = encode_line_list(new_lines, &mut token_map, &mut next_codepoint);
449
450 (old_encoded, new_encoded)
451}
452
453fn encode_line_list<'a>(
454 lines: &'a [&'a str],
455 map: &mut HashMap<&'a str, char>,
456 next_codepoint: &mut u32,
457) -> String {
458 let mut encoded = String::with_capacity(lines.len());
459 for &line in lines {
460 let token = if let Some(&value) = map.get(line) {
461 value
462 } else {
463 let Some(ch) = next_token_char(next_codepoint) else {
464 break;
465 };
466 map.insert(line, ch);
467 ch
468 };
469 encoded.push(token);
470 }
471 encoded
472}
473
474fn next_token_char(counter: &mut u32) -> Option<char> {
475 while *counter <= 0x10FFFF {
476 let candidate = *counter;
477 *counter += 1;
478 if (0xD800..=0xDFFF).contains(&candidate) {
479 continue;
480 }
481 if let Some(ch) = char::from_u32(candidate) {
482 return Some(ch);
483 }
484 }
485 None
486}
487
488#[derive(Debug)]
489struct LineRecord<'a> {
490 kind: DiffLineKind,
491 old_line: Option<u32>,
492 new_line: Option<u32>,
493 text: &'a str,
494 anchor_old: u32,
495 anchor_new: u32,
496}
497
498fn build_hunks(records: &[LineRecord<'_>], context: usize) -> Vec<DiffHunk> {
499 if records.is_empty() {
500 return Vec::new();
501 }
502
503 let ranges = compute_hunk_ranges(records, context);
504 let mut hunks = Vec::with_capacity(ranges.len());
505
506 for (start, end) in ranges {
507 let slice = &records[start..=end];
508
509 let old_start = slice
510 .iter()
511 .filter_map(|r| r.old_line)
512 .min()
513 .or_else(|| slice.iter().map(|r| r.anchor_old).min())
514 .unwrap_or(1) as usize;
515
516 let new_start = slice
517 .iter()
518 .filter_map(|r| r.new_line)
519 .min()
520 .or_else(|| slice.iter().map(|r| r.anchor_new).min())
521 .unwrap_or(1) as usize;
522
523 let old_lines = slice
524 .iter()
525 .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Deletion))
526 .count();
527 let new_lines = slice
528 .iter()
529 .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Addition))
530 .count();
531
532 let lines = slice
533 .iter()
534 .map(|record| DiffLine {
535 kind: record.kind,
536 old_line: record.old_line,
537 new_line: record.new_line,
538 text: record.text.to_string(),
539 })
540 .collect();
541
542 hunks.push(DiffHunk {
543 old_start,
544 old_lines,
545 new_start,
546 new_lines,
547 lines,
548 });
549 }
550
551 hunks
552}
553
554fn compute_hunk_ranges(records: &[LineRecord<'_>], context: usize) -> Vec<(usize, usize)> {
555 let mut ranges = Vec::with_capacity(4);
556 let mut current_start: Option<usize> = None;
557 let mut current_end: usize = 0;
558
559 for (idx, record) in records.iter().enumerate() {
560 if record.kind != DiffLineKind::Context {
561 let start = idx.saturating_sub(context);
562 let end = min(idx + context, records.len().saturating_sub(1));
563
564 if let Some(existing_start) = current_start {
565 if start < existing_start {
566 current_start = Some(start);
567 }
568 if end > current_end {
569 current_end = end;
570 }
571 } else {
572 current_start = Some(start);
573 current_end = end;
574 }
575 } else if let Some(start) = current_start
576 && idx > current_end
577 {
578 ranges.push((start, current_end));
579 current_start = None;
580 }
581 }
582
583 if let Some(start) = current_start {
584 ranges.push((start, current_end));
585 }
586
587 ranges
588}