Skip to main content

fresh/model/
line_diff.rs

1//! Line-based diff algorithm for comparing saved vs current buffer content.
2//!
3//! This module provides a simple but robust diff algorithm that correctly handles
4//! insertions, deletions, and modifications. It uses a longest common subsequence (LCS)
5//! approach to identify which lines are unchanged, then marks the ranges that differ.
6
7use std::ops::Range;
8
9/// Type of change detected for a line range
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum ChangeType {
12    /// Lines that were inserted (new lines not in saved)
13    Inserted,
14    /// Lines that were modified (same position, different content)
15    Modified,
16    /// Deletion marker (line after where content was deleted)
17    Deleted,
18}
19
20/// A range of lines with a specific change type
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct LineChange {
23    /// The range of line indices in the current buffer
24    pub range: Range<usize>,
25    /// What type of change this represents
26    pub change_type: ChangeType,
27}
28
29impl LineChange {
30    pub fn new(range: Range<usize>, change_type: ChangeType) -> Self {
31        Self { range, change_type }
32    }
33}
34
35/// Result of comparing two text buffers line by line.
36#[derive(Debug, Clone, PartialEq, Eq)]
37pub struct LineDiff {
38    /// Whether the two buffers are identical.
39    pub equal: bool,
40    /// Line ranges in the "current" buffer that differ from "saved".
41    /// These are the lines that should show modification indicators.
42    pub changed_lines: Vec<Range<usize>>,
43    /// Detailed changes with type information
44    pub changes: Vec<LineChange>,
45}
46
47/// Compare two byte slices line by line and return which lines in `current` differ from `saved`.
48///
49/// This uses the classic LCS (Longest Common Subsequence) algorithm which is the
50/// foundation of most diff tools including Unix `diff`. The algorithm:
51/// 1. Find the longest common subsequence of lines between saved and current
52/// 2. Lines in current not in the LCS are insertions/modifications
53/// 3. Lines in saved not in the LCS represent deletions (marked at deletion point)
54///
55/// This correctly handles insertions, deletions, and modifications without
56/// incorrectly marking shifted lines as changed.
57pub fn diff_lines(saved: &[u8], current: &[u8]) -> LineDiff {
58    let saved_lines: Vec<&[u8]> = saved.split(|&b| b == b'\n').collect();
59    let current_lines: Vec<&[u8]> = current.split(|&b| b == b'\n').collect();
60
61    // Quick check: if identical, return early
62    if saved == current {
63        return LineDiff {
64            equal: true,
65            changed_lines: vec![],
66            changes: vec![],
67        };
68    }
69
70    // Find LCS (longest common subsequence) of lines
71    let lcs = longest_common_subsequence(&saved_lines, &current_lines);
72
73    // Mark lines in current that are NOT part of the LCS as changed
74    // Also mark deletion points where saved lines were removed
75    let (changed_lines, changes) =
76        find_changed_lines_with_deletions(&saved_lines, &current_lines, &lcs);
77
78    LineDiff {
79        equal: changed_lines.is_empty() && changes.is_empty(),
80        changed_lines,
81        changes,
82    }
83}
84
85/// Represents a match between saved and current line indices
86#[derive(Debug, Clone, Copy)]
87struct LineMatch {
88    saved_idx: usize,
89    current_idx: usize,
90}
91
92/// Find the longest common subsequence of lines between saved and current.
93/// Returns a list of LineMatch with both saved and current indices.
94fn longest_common_subsequence(saved: &[&[u8]], current: &[&[u8]]) -> Vec<LineMatch> {
95    let n = saved.len();
96    let m = current.len();
97
98    if n == 0 || m == 0 {
99        return vec![];
100    }
101
102    // DP table for LCS length
103    // dp[i][j] = length of LCS of saved[0..i] and current[0..j]
104    let mut dp = vec![vec![0usize; m + 1]; n + 1];
105
106    for i in 1..=n {
107        for j in 1..=m {
108            if saved[i - 1] == current[j - 1] {
109                dp[i][j] = dp[i - 1][j - 1] + 1;
110            } else {
111                dp[i][j] = dp[i - 1][j].max(dp[i][j - 1]);
112            }
113        }
114    }
115
116    // Backtrack to find the actual LCS
117    let mut lcs = Vec::new();
118    let mut i = n;
119    let mut j = m;
120
121    while i > 0 && j > 0 {
122        if saved[i - 1] == current[j - 1] {
123            lcs.push(LineMatch {
124                saved_idx: i - 1,
125                current_idx: j - 1,
126            });
127            i -= 1;
128            j -= 1;
129        } else if dp[i - 1][j] > dp[i][j - 1] {
130            i -= 1;
131        } else {
132            j -= 1;
133        }
134    }
135
136    lcs.reverse();
137    lcs
138}
139
140/// Given the LCS matches, find which lines in current are changed.
141/// This includes:
142/// - Lines in current that are not in the LCS (insertions/modifications)
143/// - Deletion points where saved lines were removed (marked at the line after deletion)
144///
145/// Returns both the simple ranges (for backward compatibility) and typed changes.
146fn find_changed_lines_with_deletions(
147    saved: &[&[u8]],
148    current: &[&[u8]],
149    lcs: &[LineMatch],
150) -> (Vec<Range<usize>>, Vec<LineChange>) {
151    let mut matched_in_current: Vec<bool> = vec![false; current.len()];
152    let mut matched_in_saved: Vec<bool> = vec![false; saved.len()];
153
154    for m in lcs {
155        matched_in_current[m.current_idx] = true;
156        matched_in_saved[m.saved_idx] = true;
157    }
158
159    let mut ranges = Vec::new();
160    let mut changes = Vec::new();
161
162    // Determine change types by analyzing the LCS alignment
163    // Build a map of which saved line each current line corresponds to
164    let mut current_to_saved: Vec<Option<usize>> = vec![None; current.len()];
165    for m in lcs {
166        current_to_saved[m.current_idx] = Some(m.saved_idx);
167    }
168
169    // Find insertions/modifications: lines in current not in LCS
170    let mut i = 0;
171    while i < current.len() {
172        if !matched_in_current[i] {
173            let start = i;
174            while i < current.len() && !matched_in_current[i] {
175                i += 1;
176            }
177            let range = start..i;
178
179            // Determine if this is an insertion or modification
180            // It's a modification if there's a corresponding saved line at the same position
181            // that was also not matched (i.e., both were changed)
182            let change_type = classify_change(start, i, saved.len(), current.len(), lcs);
183            changes.push(LineChange::new(range.clone(), change_type));
184            ranges.push(range);
185        } else {
186            i += 1;
187        }
188    }
189
190    // Find deletions: lines in saved not in LCS
191    // Mark the deletion point in current (the line after where deletion occurred)
192    let mut saved_idx = 0;
193    let mut current_idx = 0;
194    let mut lcs_idx = 0;
195
196    while saved_idx < saved.len() {
197        if lcs_idx < lcs.len() && lcs[lcs_idx].saved_idx == saved_idx {
198            // This saved line is matched
199            current_idx = lcs[lcs_idx].current_idx + 1;
200            saved_idx += 1;
201            lcs_idx += 1;
202        } else {
203            // This saved line was deleted - mark at current position
204            let deletion_line = if current_idx < current.len() {
205                current_idx
206            } else if !current.is_empty() {
207                current.len() - 1
208            } else {
209                0
210            };
211            let range = deletion_line..deletion_line + 1;
212            changes.push(LineChange::new(range.clone(), ChangeType::Deleted));
213            ranges.push(range);
214            saved_idx += 1;
215        }
216    }
217
218    // Sort changes by range start
219    changes.sort_by_key(|c| c.range.start);
220
221    // Merge ranges but keep changes separate (they have different types)
222    let merged_ranges = merge_ranges(ranges);
223
224    (merged_ranges, changes)
225}
226
227/// Classify a change as insertion or modification based on context
228fn classify_change(
229    start: usize,
230    end: usize,
231    saved_len: usize,
232    current_len: usize,
233    lcs: &[LineMatch],
234) -> ChangeType {
235    // If current is longer than saved, extra lines are insertions
236    if current_len > saved_len {
237        // Check if this range is beyond the original saved length
238        // by looking at where matched lines are
239        let last_matched_saved = lcs.iter().map(|m| m.saved_idx).max();
240        let last_matched_current = lcs.iter().map(|m| m.current_idx).max();
241
242        match (last_matched_saved, last_matched_current) {
243            (Some(ls), Some(lc)) if start > lc && start >= ls => {
244                // This is after all matched content - likely insertion
245                return ChangeType::Inserted;
246            }
247            _ => {}
248        }
249    }
250
251    // If there are matched lines before and after this range, it's likely an insertion
252    let has_match_before = lcs.iter().any(|m| m.current_idx < start);
253    let has_match_after = lcs.iter().any(|m| m.current_idx >= end);
254
255    if has_match_before && has_match_after {
256        // Lines in the middle between matched content - insertion
257        ChangeType::Inserted
258    } else if !has_match_before && has_match_after {
259        // At the beginning before any matches
260        if current_len > saved_len {
261            ChangeType::Inserted
262        } else {
263            ChangeType::Modified
264        }
265    } else if has_match_before && !has_match_after {
266        // At the end after all matches
267        if current_len > saved_len {
268            ChangeType::Inserted
269        } else {
270            ChangeType::Modified
271        }
272    } else {
273        // No matches at all - whole file changed
274        ChangeType::Modified
275    }
276}
277
278/// Merge adjacent or overlapping ranges.
279pub fn merge_ranges(ranges: Vec<Range<usize>>) -> Vec<Range<usize>> {
280    if ranges.is_empty() {
281        return ranges;
282    }
283
284    let mut sorted = ranges;
285    sorted.sort_by_key(|r| r.start);
286
287    let mut merged = Vec::new();
288    let mut current = sorted[0].clone();
289
290    for range in sorted.into_iter().skip(1) {
291        if range.start <= current.end {
292            current.end = current.end.max(range.end);
293        } else {
294            merged.push(current);
295            current = range;
296        }
297    }
298    merged.push(current);
299    merged
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305
306    #[test]
307    fn test_identical_content() {
308        let content = b"line 1\nline 2\nline 3\n";
309        let diff = diff_lines(content, content);
310        assert!(diff.equal);
311        assert!(diff.changed_lines.is_empty());
312    }
313
314    #[test]
315    fn test_empty_files() {
316        let diff = diff_lines(b"", b"");
317        assert!(diff.equal);
318        assert!(diff.changed_lines.is_empty());
319    }
320
321    #[test]
322    fn test_single_line_modification() {
323        let saved = b"line 1\nline 2\nline 3\n";
324        let current = b"line 1\nmodified\nline 3\n";
325        let diff = diff_lines(saved, current);
326
327        assert!(!diff.equal);
328        assert_eq!(diff.changed_lines, vec![1..2]);
329    }
330
331    #[test]
332    fn test_insert_line_at_beginning() {
333        let saved = b"line 1\nline 2\nline 3\n";
334        let current = b"new line\nline 1\nline 2\nline 3\n";
335        let diff = diff_lines(saved, current);
336
337        assert!(!diff.equal);
338        // Only the new line should be marked, not the shifted lines
339        assert_eq!(diff.changed_lines, vec![0..1]);
340    }
341
342    #[test]
343    fn test_insert_line_in_middle() {
344        let saved = b"line 1\nline 2\nline 3\n";
345        let current = b"line 1\nline 2\nnew line\nline 3\n";
346        let diff = diff_lines(saved, current);
347
348        assert!(!diff.equal);
349        // Only the new line should be marked
350        assert_eq!(diff.changed_lines, vec![2..3]);
351    }
352
353    #[test]
354    fn test_insert_line_at_end() {
355        let saved = b"line 1\nline 2\nline 3\n";
356        let current = b"line 1\nline 2\nline 3\nnew line\n";
357        let diff = diff_lines(saved, current);
358
359        assert!(!diff.equal);
360        // Only the new line should be marked
361        assert_eq!(diff.changed_lines, vec![3..4]);
362    }
363
364    #[test]
365    fn test_delete_line_from_beginning() {
366        let saved = b"line 1\nline 2\nline 3\n";
367        let current = b"line 2\nline 3\n";
368        let diff = diff_lines(saved, current);
369
370        assert!(!diff.equal);
371        // Deletion at beginning - mark line 0 as the deletion point
372        assert_eq!(diff.changed_lines, vec![0..1]);
373    }
374
375    #[test]
376    fn test_delete_line_from_middle() {
377        let saved = b"line 1\nline 2\nline 3\n";
378        let current = b"line 1\nline 3\n";
379        let diff = diff_lines(saved, current);
380
381        assert!(!diff.equal);
382        // Deletion after line 1 - mark line 1 as the deletion point
383        assert_eq!(diff.changed_lines, vec![1..2]);
384    }
385
386    #[test]
387    fn test_insert_newline_splits_line() {
388        // This is the key test case: inserting Enter at end of line 2
389        let saved = b"line 1\nline 2\nline 3\nline 4\nline 5\n";
390        let current = b"line 1\nline 2\n\nline 3\nline 4\nline 5\n";
391        let diff = diff_lines(saved, current);
392
393        assert!(!diff.equal);
394        // Only the new empty line should be marked (line index 2)
395        // Lines 3, 4, 5 should NOT be marked even though they shifted
396        assert_eq!(diff.changed_lines, vec![2..3]);
397    }
398
399    #[test]
400    fn test_multiple_insertions() {
401        let saved = b"a\nb\nc\n";
402        let current = b"a\nx\nb\ny\nc\n";
403        let diff = diff_lines(saved, current);
404
405        assert!(!diff.equal);
406        // Lines 1 (x) and 3 (y) are new
407        assert_eq!(diff.changed_lines, vec![1..2, 3..4]);
408    }
409
410    #[test]
411    fn test_multiple_deletions() {
412        let saved = b"a\nx\nb\ny\nc\n";
413        let current = b"a\nb\nc\n";
414        let diff = diff_lines(saved, current);
415
416        assert!(!diff.equal);
417        // Deletions after 'a' and after 'b' - mark lines 1 and 2
418        assert_eq!(diff.changed_lines, vec![1..3]);
419    }
420
421    #[test]
422    fn test_replace_all_content() {
423        let saved = b"old 1\nold 2\nold 3\n";
424        let current = b"new 1\nnew 2\n";
425        let diff = diff_lines(saved, current);
426
427        assert!(!diff.equal);
428        // All lines in current are new
429        assert_eq!(diff.changed_lines, vec![0..2]);
430    }
431
432    #[test]
433    fn test_content_restored_via_paste() {
434        // Simulates: cut "world", paste it back
435        let saved = b"hello world\n";
436        let current = b"hello world\n";
437        let diff = diff_lines(saved, current);
438
439        assert!(diff.equal);
440        assert!(diff.changed_lines.is_empty());
441    }
442
443    #[test]
444    fn test_interleaved_changes() {
445        let saved = b"a\nb\nc\nd\ne\n";
446        let current = b"a\nB\nc\nD\ne\n";
447        let diff = diff_lines(saved, current);
448
449        assert!(!diff.equal);
450        // Lines 1 (B) and 3 (D) are modified
451        assert_eq!(diff.changed_lines, vec![1..2, 3..4]);
452    }
453
454    #[test]
455    fn test_merge_adjacent_ranges() {
456        let ranges = vec![0..1, 1..2, 3..4];
457        let merged = merge_ranges(ranges);
458        assert_eq!(merged, vec![0..2, 3..4]);
459    }
460
461    #[test]
462    fn test_merge_overlapping_ranges() {
463        let ranges = vec![0..3, 2..5, 7..9];
464        let merged = merge_ranges(ranges);
465        assert_eq!(merged, vec![0..5, 7..9]);
466    }
467
468    #[test]
469    fn test_delete_at_end() {
470        let saved = b"line 1\nline 2\nline 3\n";
471        let current = b"line 1\nline 2\n";
472        let diff = diff_lines(saved, current);
473
474        assert!(!diff.equal);
475        // Deletion at end - mark last line as deletion point
476        assert!(!diff.changed_lines.is_empty());
477    }
478
479    #[test]
480    fn test_add_at_end_of_existing_line() {
481        // Adding text to end of a line (not a newline)
482        let saved = b"hello\n";
483        let current = b"hello world\n";
484        let diff = diff_lines(saved, current);
485
486        assert!(!diff.equal);
487        assert_eq!(diff.changed_lines, vec![0..1]);
488    }
489}
490
491#[cfg(test)]
492mod proptests {
493    use super::*;
494    use proptest::prelude::*;
495
496    /// Generate a simple multi-line string
497    fn multiline_string() -> impl Strategy<Value = Vec<u8>> {
498        prop::collection::vec("[a-z ]{0,20}", 0..10).prop_map(|lines| {
499            let joined = lines.join("\n");
500            joined.into_bytes()
501        })
502    }
503
504    proptest! {
505        /// Identical content should always produce equal=true
506        #[test]
507        fn identical_content_is_equal(content in multiline_string()) {
508            let diff = diff_lines(&content, &content);
509            prop_assert!(diff.equal);
510            prop_assert!(diff.changed_lines.is_empty());
511        }
512
513        /// Diff should be symmetric in terms of detecting changes
514        /// (though the specific changed_lines may differ)
515        #[test]
516        fn diff_detects_any_difference(
517            saved in multiline_string(),
518            current in multiline_string()
519        ) {
520            let diff = diff_lines(&saved, &current);
521            if saved == current {
522                prop_assert!(diff.equal);
523            } else {
524                prop_assert!(!diff.equal);
525            }
526        }
527
528        /// Inserting a single line should only mark that one line
529        #[test]
530        fn single_line_insert_marks_one_line(
531            prefix_lines in prop::collection::vec("[a-z]{1,10}", 0..5),
532            new_line in "[a-z]{1,10}",
533            suffix_lines in prop::collection::vec("[a-z]{1,10}", 0..5)
534        ) {
535            let saved_lines: Vec<String> = prefix_lines.iter()
536                .chain(suffix_lines.iter())
537                .cloned()
538                .collect();
539            let saved = saved_lines.join("\n").into_bytes();
540
541            let current_lines: Vec<String> = prefix_lines.iter()
542                .cloned()
543                .chain(std::iter::once(new_line))
544                .chain(suffix_lines.iter().cloned())
545                .collect();
546            let current = current_lines.join("\n").into_bytes();
547
548            let diff = diff_lines(&saved, &current);
549
550            // Should detect a change
551            prop_assert!(!diff.equal);
552
553            // Should only mark the inserted line (at position prefix_lines.len())
554            // The changed_lines should contain exactly one range of size 1
555            let total_changed: usize = diff.changed_lines.iter()
556                .map(|r| r.end - r.start)
557                .sum();
558            prop_assert_eq!(total_changed, 1, "Inserting one line should mark exactly one line");
559        }
560
561        /// Changed lines should always be valid indices in the current buffer
562        #[test]
563        fn changed_lines_are_valid_indices(
564            saved in multiline_string(),
565            current in multiline_string()
566        ) {
567            let diff = diff_lines(&saved, &current);
568            let current_line_count = current.split(|&b| b == b'\n').count();
569
570            for range in &diff.changed_lines {
571                prop_assert!(range.start < current_line_count || current_line_count == 0,
572                    "Range start {} should be < line count {}",
573                    range.start, current_line_count);
574                prop_assert!(range.end <= current_line_count || current_line_count == 0,
575                    "Range end {} should be <= line count {}",
576                    range.end, current_line_count);
577            }
578        }
579
580        /// Changed line ranges should not overlap and should be sorted
581        #[test]
582        fn changed_lines_are_sorted_and_non_overlapping(
583            saved in multiline_string(),
584            current in multiline_string()
585        ) {
586            let diff = diff_lines(&saved, &current);
587
588            for i in 1..diff.changed_lines.len() {
589                let prev = &diff.changed_lines[i - 1];
590                let curr = &diff.changed_lines[i];
591                prop_assert!(prev.end <= curr.start,
592                    "Ranges should not overlap: {:?} and {:?}", prev, curr);
593            }
594        }
595    }
596}