1use hashbrown::HashMap;
4use serde::Serialize;
5use std::cmp::min;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Chunk<'a> {
10 Equal(&'a str),
11 Delete(&'a str),
12 Insert(&'a str),
13}
14
15pub fn compute_diff_chunks<'a>(old: &'a str, new: &'a str) -> Vec<Chunk<'a>> {
17 if old.is_empty() && new.is_empty() {
18 return Vec::with_capacity(0);
19 }
20 if old.is_empty() {
21 return vec![Chunk::Insert(new)];
22 }
23 if new.is_empty() {
24 return vec![Chunk::Delete(old)];
25 }
26
27 let mut prefix_len = 0;
29 for (o, n) in old.chars().zip(new.chars()) {
30 if o == n {
31 prefix_len += o.len_utf8();
32 } else {
33 break;
34 }
35 }
36
37 let mut suffix_len = 0;
38 let old_rest = &old[prefix_len..];
39 let new_rest = &new[prefix_len..];
40
41 let old_chars: Vec<char> = old_rest.chars().collect();
42 let new_chars: Vec<char> = new_rest.chars().collect();
43
44 for (o, n) in old_chars.iter().rev().zip(new_chars.iter().rev()) {
45 if o == n {
46 suffix_len += o.len_utf8();
47 } else {
48 break;
49 }
50 }
51
52 let old_middle_end = old_rest.len() - suffix_len;
53 let new_middle_end = new_rest.len() - suffix_len;
54
55 let old_middle = &old_rest[..old_middle_end];
56 let new_middle = &new_rest[..new_middle_end];
57
58 let mut result = Vec::with_capacity(old_middle.len() + new_middle.len());
59
60 if prefix_len > 0 {
62 result.push(Chunk::Equal(&old[..prefix_len]));
63 }
64
65 if !old_middle.is_empty() || !new_middle.is_empty() {
67 let old_chars: Vec<char> = old_middle.chars().collect();
68 let new_chars: Vec<char> = new_middle.chars().collect();
69 let old_byte_starts: Vec<usize> = old_middle.char_indices().map(|(idx, _)| idx).collect();
70 let new_byte_starts: Vec<usize> = new_middle.char_indices().map(|(idx, _)| idx).collect();
71 let edits = myers_diff(&old_chars, &new_chars);
72
73 let mut old_pos = 0;
74 let mut new_pos = 0;
75
76 for edit in edits {
77 match edit {
78 Edit::Equal => {
79 old_pos += 1;
80 new_pos += 1;
81 }
82 Edit::Delete => {
83 let Some(ch) = old_chars.get(old_pos).copied() else {
84 break;
85 };
86 let Some(byte_start) = old_byte_starts.get(old_pos).copied() else {
87 break;
88 };
89 let byte_end = byte_start + ch.len_utf8();
90 result.push(Chunk::Delete(&old_middle[byte_start..byte_end]));
91 old_pos += 1;
92 }
93 Edit::Insert => {
94 let Some(ch) = new_chars.get(new_pos).copied() else {
95 break;
96 };
97 let Some(byte_start) = new_byte_starts.get(new_pos).copied() else {
98 break;
99 };
100 let byte_end = byte_start + ch.len_utf8();
101 result.push(Chunk::Insert(&new_middle[byte_start..byte_end]));
102 new_pos += 1;
103 }
104 }
105 }
106 }
107
108 if suffix_len > 0 {
110 result.push(Chunk::Equal(&old[old.len() - suffix_len..]));
111 }
112
113 result
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117enum Edit {
118 Equal,
119 Delete,
120 Insert,
121}
122
123fn myers_diff(old: &[char], new: &[char]) -> Vec<Edit> {
124 let n = old.len();
125 let m = new.len();
126
127 if n == 0 {
128 return vec![Edit::Insert; m];
129 }
130 if m == 0 {
131 return vec![Edit::Delete; n];
132 }
133
134 let max_d = n + m;
135 let max_d_i32 = max_d as i32;
136 let mut v = vec![0; 2 * max_d + 1];
137 let mut v_index = vec![0usize; (max_d + 1) * (2 * max_d + 1)];
138 let row_len = 2 * max_d + 1;
139
140 v[max_d] = 0;
141
142 for d in 0..=max_d {
143 let d_i32 = d as i32;
144 let row_start = d * row_len;
145 for k in (-d_i32..=d_i32).step_by(2) {
146 let k_idx = (k + max_d_i32) as usize;
147
148 let x = if k == -d_i32 || (k != d_i32 && v[k_idx - 1] < v[k_idx + 1]) {
149 v[k_idx + 1]
150 } else {
151 v[k_idx - 1] + 1
152 };
153
154 let mut x = x;
155 let mut y = (x as i32 - k) as usize;
156
157 while x < n && y < m && old[x] == new[y] {
158 x += 1;
159 y += 1;
160 }
161
162 v[k_idx] = x;
163 v_index[row_start + k_idx] = x;
164
165 if x >= n && y >= m {
166 return backtrack_myers(old, new, &v_index, d, k, max_d);
167 }
168 }
169 }
170
171 vec![]
172}
173
174fn backtrack_myers(
175 old: &[char],
176 new: &[char],
177 v_index: &[usize],
178 d: usize,
179 mut k: i32,
180 max_d: usize,
181) -> Vec<Edit> {
182 let mut edits = Vec::with_capacity(old.len() + new.len());
183 let mut x = old.len();
184 let mut y = new.len();
185 let max_d_i32 = max_d as i32;
186 let row_len = 2 * max_d + 1;
187
188 for cur_d in (0..=d).rev() {
189 if cur_d == 0 {
190 while x > 0 && y > 0 {
191 edits.push(Edit::Equal);
192 x -= 1;
193 y -= 1;
194 }
195 break;
196 }
197
198 let k_idx = (k + max_d_i32) as usize;
199 let prev_row_start = (cur_d - 1) * row_len;
200
201 let prev_k = if k == -(cur_d as i32)
202 || (k != cur_d as i32
203 && v_index[prev_row_start + k_idx - 1] < v_index[prev_row_start + k_idx + 1])
204 {
205 k + 1
206 } else {
207 k - 1
208 };
209
210 let prev_k_idx = (prev_k + max_d_i32) as usize;
211 let prev_x_val = v_index[prev_row_start + prev_k_idx];
212 let prev_y = (prev_x_val as i32 - prev_k) as usize;
213
214 let (move_x, move_y) = if prev_k == k + 1 {
215 (prev_x_val, prev_y + 1)
216 } else {
217 (prev_x_val + 1, prev_y)
218 };
219
220 while x > move_x && y > move_y {
221 edits.push(Edit::Equal);
222 x -= 1;
223 y -= 1;
224 }
225
226 if prev_k == k + 1 {
227 edits.push(Edit::Insert);
228 y -= 1;
229 } else {
230 edits.push(Edit::Delete);
231 x -= 1;
232 }
233
234 k = prev_k;
235 }
236
237 edits.reverse();
238 edits
239}
240
241#[derive(Debug, Clone)]
243pub struct DiffOptions<'a> {
244 pub context_lines: usize,
245 pub old_label: Option<&'a str>,
246 pub new_label: Option<&'a str>,
247 pub missing_newline_hint: bool,
248}
249
250impl Default for DiffOptions<'_> {
251 fn default() -> Self {
252 Self {
253 context_lines: 3,
254 old_label: None,
255 new_label: None,
256 missing_newline_hint: true,
257 }
258 }
259}
260
261#[derive(Debug, Clone, Serialize)]
263pub struct DiffBundle {
264 pub hunks: Vec<DiffHunk>,
265 pub formatted: String,
266 pub is_empty: bool,
267}
268
269#[derive(Debug, Clone, Serialize)]
271pub struct DiffHunk {
272 pub old_start: usize,
273 pub old_lines: usize,
274 pub new_start: usize,
275 pub new_lines: usize,
276 pub lines: Vec<DiffLine>,
277}
278
279#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
281#[serde(rename_all = "snake_case")]
282pub enum DiffLineKind {
283 Context,
284 Addition,
285 Deletion,
286}
287
288#[derive(Debug, Clone, Serialize)]
290pub struct DiffLine {
291 pub kind: DiffLineKind,
292 pub old_line: Option<usize>,
293 pub new_line: Option<usize>,
294 pub text: String,
295}
296
297pub fn compute_diff<F>(old: &str, new: &str, options: DiffOptions<'_>, formatter: F) -> DiffBundle
299where
300 F: FnOnce(&[DiffHunk], &DiffOptions<'_>) -> String,
301{
302 let old_lines_owned = split_lines_with_terminator(old);
303 let new_lines_owned = split_lines_with_terminator(new);
304
305 let old_refs: Vec<&str> = old_lines_owned.iter().map(|s| s.as_str()).collect();
306 let new_refs: Vec<&str> = new_lines_owned.iter().map(|s| s.as_str()).collect();
307
308 let records = collect_line_records(&old_refs, &new_refs);
309 let has_changes = records
310 .iter()
311 .any(|record| matches!(record.kind, DiffLineKind::Addition | DiffLineKind::Deletion));
312
313 let hunks = if has_changes {
314 build_hunks(&records, options.context_lines)
315 } else {
316 Vec::new()
317 };
318
319 let formatted = if hunks.is_empty() {
320 String::new()
321 } else {
322 formatter(&hunks, &options)
323 };
324
325 DiffBundle {
326 hunks,
327 formatted,
328 is_empty: !has_changes,
329 }
330}
331
332fn split_lines_with_terminator(text: &str) -> Vec<String> {
333 if text.is_empty() {
334 return Vec::with_capacity(0);
335 }
336
337 let mut lines: Vec<String> = text
338 .split_inclusive('\n')
339 .map(|line| line.to_string())
340 .collect();
341
342 if lines.is_empty() {
343 lines.push(text.to_string());
344 }
345
346 lines
347}
348
349fn collect_line_records<'a>(
350 old_lines: &'a [&'a str],
351 new_lines: &'a [&'a str],
352) -> Vec<LineRecord<'a>> {
353 let (old_encoded, new_encoded) = encode_line_sequences(old_lines, new_lines);
354 let mut records = Vec::with_capacity(old_lines.len() + new_lines.len());
355 let mut old_index = 0usize;
356 let mut new_index = 0usize;
357
358 for chunk in compute_diff_chunks(old_encoded.as_str(), new_encoded.as_str()) {
359 match chunk {
360 Chunk::Equal(text) => {
361 for _ in text.chars() {
362 let old_line = old_index + 1;
363 let new_line = new_index + 1;
364 let line = old_lines[old_index];
365 records.push(LineRecord {
366 kind: DiffLineKind::Context,
367 old_line: Some(old_line),
368 new_line: Some(new_line),
369 text: line,
370 anchor_old: old_line,
371 anchor_new: new_line,
372 });
373 old_index += 1;
374 new_index += 1;
375 }
376 }
377 Chunk::Delete(text) => {
378 for _ in text.chars() {
379 let old_line = old_index + 1;
380 let anchor_new = new_index + 1;
381 let line = old_lines[old_index];
382 records.push(LineRecord {
383 kind: DiffLineKind::Deletion,
384 old_line: Some(old_line),
385 new_line: None,
386 text: line,
387 anchor_old: old_line,
388 anchor_new,
389 });
390 old_index += 1;
391 }
392 }
393 Chunk::Insert(text) => {
394 for _ in text.chars() {
395 let new_line = new_index + 1;
396 let anchor_old = old_index + 1;
397 let line = new_lines[new_index];
398 records.push(LineRecord {
399 kind: DiffLineKind::Addition,
400 old_line: None,
401 new_line: Some(new_line),
402 text: line,
403 anchor_old,
404 anchor_new: new_line,
405 });
406 new_index += 1;
407 }
408 }
409 }
410 }
411
412 records
413}
414
415fn encode_line_sequences<'a>(
416 old_lines: &'a [&'a str],
417 new_lines: &'a [&'a str],
418) -> (String, String) {
419 let mut token_map: HashMap<&'a str, char> = HashMap::new();
420 let mut next_codepoint: u32 = 0;
421
422 let old_encoded = encode_line_list(old_lines, &mut token_map, &mut next_codepoint);
423 let new_encoded = encode_line_list(new_lines, &mut token_map, &mut next_codepoint);
424
425 (old_encoded, new_encoded)
426}
427
428fn encode_line_list<'a>(
429 lines: &'a [&'a str],
430 map: &mut HashMap<&'a str, char>,
431 next_codepoint: &mut u32,
432) -> String {
433 let mut encoded = String::with_capacity(lines.len());
434 for &line in lines {
435 let token = if let Some(&value) = map.get(line) {
436 value
437 } else {
438 let Some(ch) = next_token_char(next_codepoint) else {
439 break;
440 };
441 map.insert(line, ch);
442 ch
443 };
444 encoded.push(token);
445 }
446 encoded
447}
448
449fn next_token_char(counter: &mut u32) -> Option<char> {
450 while *counter <= 0x10FFFF {
451 let candidate = *counter;
452 *counter += 1;
453 if (0xD800..=0xDFFF).contains(&candidate) {
454 continue;
455 }
456 if let Some(ch) = char::from_u32(candidate) {
457 return Some(ch);
458 }
459 }
460 None
461}
462
463#[derive(Debug)]
464struct LineRecord<'a> {
465 kind: DiffLineKind,
466 old_line: Option<usize>,
467 new_line: Option<usize>,
468 text: &'a str,
469 anchor_old: usize,
470 anchor_new: usize,
471}
472
473fn build_hunks(records: &[LineRecord<'_>], context: usize) -> Vec<DiffHunk> {
474 if records.is_empty() {
475 return Vec::new();
476 }
477
478 let ranges = compute_hunk_ranges(records, context);
479 let mut hunks = Vec::with_capacity(ranges.len());
480
481 for (start, end) in ranges {
482 let slice = &records[start..=end];
483
484 let old_start = slice
485 .iter()
486 .filter_map(|r| r.old_line)
487 .min()
488 .or_else(|| slice.iter().map(|r| r.anchor_old).min())
489 .unwrap_or(1);
490
491 let new_start = slice
492 .iter()
493 .filter_map(|r| r.new_line)
494 .min()
495 .or_else(|| slice.iter().map(|r| r.anchor_new).min())
496 .unwrap_or(1);
497
498 let old_lines = slice
499 .iter()
500 .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Deletion))
501 .count();
502 let new_lines = slice
503 .iter()
504 .filter(|r| matches!(r.kind, DiffLineKind::Context | DiffLineKind::Addition))
505 .count();
506
507 let lines = slice
508 .iter()
509 .map(|record| DiffLine {
510 kind: record.kind.clone(),
511 old_line: record.old_line,
512 new_line: record.new_line,
513 text: record.text.to_string(),
514 })
515 .collect();
516
517 hunks.push(DiffHunk {
518 old_start,
519 old_lines,
520 new_start,
521 new_lines,
522 lines,
523 });
524 }
525
526 hunks
527}
528
529fn compute_hunk_ranges(records: &[LineRecord<'_>], context: usize) -> Vec<(usize, usize)> {
530 let mut ranges = Vec::with_capacity(4);
531 let mut current_start: Option<usize> = None;
532 let mut current_end: usize = 0;
533
534 for (idx, record) in records.iter().enumerate() {
535 if record.kind != DiffLineKind::Context {
536 let start = idx.saturating_sub(context);
537 let end = min(idx + context, records.len().saturating_sub(1));
538
539 if let Some(existing_start) = current_start {
540 if start < existing_start {
541 current_start = Some(start);
542 }
543 if end > current_end {
544 current_end = end;
545 }
546 } else {
547 current_start = Some(start);
548 current_end = end;
549 }
550 } else if let Some(start) = current_start
551 && idx > current_end
552 {
553 ranges.push((start, current_end));
554 current_start = None;
555 }
556 }
557
558 if let Some(start) = current_start {
559 ranges.push((start, current_end));
560 }
561
562 ranges
563}