Skip to main content

cached_context/
diff.rs

1//! Diff module - computes unified diffs between old and new file content
2
3use similar::{ChangeTag, TextDiff};
4use std::collections::HashSet;
5
6/// Result of computing a diff between two file contents
7#[derive(Debug, Clone)]
8pub struct DiffResult {
9    /// Unified diff string
10    pub diff: String,
11    /// Whether there are any changes
12    pub has_changes: bool,
13    /// Number of lines changed (added + removed)
14    pub lines_changed: usize,
15    /// Line numbers in the NEW file that were added or modified
16    pub changed_new_lines: Vec<usize>,
17}
18
19/// Compute a unified diff between old and new content
20///
21/// # Arguments
22/// * `old_content` - The original file content
23/// * `new_content` - The new file content
24/// * `file_path` - Path to the file (used in diff headers)
25///
26/// # Returns
27/// A `DiffResult` containing the unified diff string and metadata about changes
28pub fn compute_diff(old_content: &str, new_content: &str, file_path: &str) -> DiffResult {
29    // Handle edge cases: empty files
30    if old_content.is_empty() && new_content.is_empty() {
31        return DiffResult {
32            diff: String::new(),
33            has_changes: false,
34            lines_changed: 0,
35            changed_new_lines: vec![],
36        };
37    }
38
39    // Create diff using similar crate
40    let diff = TextDiff::from_lines(old_content, new_content);
41
42    // Collect raw diff lines with their line numbers
43    let mut raw_lines: Vec<RawDiffLine> = vec![];
44    let mut old_line: usize = 1;
45    let mut new_line: usize = 1;
46    let mut lines_changed: usize = 0;
47
48    for change in diff.iter_all_changes() {
49        let tag = change.tag();
50        let content = change.value();
51
52        // Remove trailing newline for consistency
53        let line_content = content.trim_end_matches('\n');
54
55        match tag {
56            ChangeTag::Equal => {
57                raw_lines.push(RawDiffLine {
58                    line_type: LineType::Keep,
59                    content: line_content.to_string(),
60                    old_line,
61                    new_line,
62                });
63                old_line += 1;
64                new_line += 1;
65            }
66            ChangeTag::Delete => {
67                raw_lines.push(RawDiffLine {
68                    line_type: LineType::Remove,
69                    content: line_content.to_string(),
70                    old_line,
71                    new_line,
72                });
73                old_line += 1;
74                lines_changed += 1;
75            }
76            ChangeTag::Insert => {
77                raw_lines.push(RawDiffLine {
78                    line_type: LineType::Add,
79                    content: line_content.to_string(),
80                    old_line,
81                    new_line,
82                });
83                new_line += 1;
84                lines_changed += 1;
85            }
86        }
87    }
88
89    // Early return if no changes
90    if lines_changed == 0 {
91        return DiffResult {
92            diff: String::new(),
93            has_changes: false,
94            lines_changed: 0,
95            changed_new_lines: vec![],
96        };
97    }
98
99    // Collect which lines in the new file were changed
100    let mut changed_new_lines_set: HashSet<usize> = HashSet::new();
101    for rl in &raw_lines {
102        match rl.line_type {
103            LineType::Add => {
104                changed_new_lines_set.insert(rl.new_line);
105            }
106            LineType::Remove => {
107                // For removals, mark the adjacent new line as affected
108                if rl.new_line > 0 {
109                    changed_new_lines_set.insert(rl.new_line);
110                }
111            }
112            LineType::Keep => {}
113        }
114    }
115
116    // Group into hunks with 3 lines of context
117    let hunks = format_hunks(&raw_lines);
118
119    // Build the final diff string
120    let header = format!("--- a/{}\n+++ b/{}", file_path, file_path);
121    let diff = if hunks.is_empty() {
122        header
123    } else {
124        format!("{}\n{}", header, hunks)
125    };
126
127    // Sort the changed new lines
128    let mut changed_new_lines: Vec<usize> = changed_new_lines_set.into_iter().collect();
129    changed_new_lines.sort();
130
131    DiffResult {
132        diff,
133        has_changes: true,
134        lines_changed,
135        changed_new_lines,
136    }
137}
138
139/// Internal representation of a line in the diff
140#[derive(Debug, Clone, PartialEq)]
141struct RawDiffLine {
142    line_type: LineType,
143    content: String,
144    old_line: usize,
145    new_line: usize,
146}
147
148/// Type of diff line
149#[derive(Debug, Clone, PartialEq)]
150enum LineType {
151    Keep,
152    Add,
153    Remove,
154}
155
156/// Format raw diff lines into hunks with context
157fn format_hunks(raw_lines: &[RawDiffLine]) -> String {
158    const CONTEXT: usize = 3;
159    let mut hunks: Vec<String> = vec![];
160    let mut current_hunk: Vec<&RawDiffLine> = vec![];
161    let mut last_change_idx: isize = -999;
162    // Track the highest raw_lines index already added to the current hunk.
163    // This replaces the O(n) contains() check with an O(1) comparison,
164    // fixing the O(n^2) behavior for large diffs.
165    let mut max_added_idx: usize = 0;
166
167    for (i, line) in raw_lines.iter().enumerate() {
168        if line.line_type != LineType::Keep {
169            // If this change is far from the last, start a new hunk
170            if i as isize - last_change_idx > (CONTEXT * 2 + 1) as isize && !current_hunk.is_empty()
171            {
172                hunks.push(format_hunk(&current_hunk));
173                current_hunk = vec![];
174                // Add leading context
175                let ctx_start = i.saturating_sub(CONTEXT);
176                current_hunk.extend(&raw_lines[ctx_start..i]);
177                max_added_idx = i; // leading context covers up to i-1, next push is i
178            } else if current_hunk.is_empty() {
179                // First hunk, add leading context
180                let ctx_start = i.saturating_sub(CONTEXT);
181                current_hunk.extend(&raw_lines[ctx_start..i]);
182                max_added_idx = i; // leading context covers up to i-1, next push is i
183            }
184            // Add all lines between last change context end and this change.
185            // Use max_added_idx to skip lines already in the hunk (O(1) instead of O(n)).
186            let context_end = (last_change_idx as usize).saturating_add(CONTEXT + 1);
187            let fill_start = std::cmp::max(context_end, max_added_idx);
188            if fill_start < i {
189                current_hunk.extend(&raw_lines[fill_start..i]);
190            }
191            current_hunk.push(line);
192            max_added_idx = i + 1;
193            last_change_idx = i as isize;
194        } else if (i as isize - last_change_idx) <= (CONTEXT as isize) && !current_hunk.is_empty() {
195            current_hunk.push(line);
196            max_added_idx = i + 1;
197        }
198    }
199
200    if !current_hunk.is_empty() {
201        hunks.push(format_hunk(&current_hunk));
202    }
203
204    hunks.join("\n")
205}
206
207/// Format a single hunk
208fn format_hunk(lines: &[&RawDiffLine]) -> String {
209    if lines.is_empty() {
210        return String::new();
211    }
212
213    let first_line = lines.first().unwrap();
214
215    // Count lines present in each side of the diff.
216    // old_count = lines that exist in old file (Keep + Remove)
217    // new_count = lines that exist in new file (Keep + Add)
218    // Line-number arithmetic is incorrect for pure insertions/deletions because
219    // old_line is not incremented for Add entries and new_line is not incremented
220    // for Remove entries, causing the formula `last - first + 1` to return 1
221    // instead of 0 for the empty side.
222    let old_count = lines
223        .iter()
224        .filter(|l| l.line_type != LineType::Add)
225        .count();
226    let new_count = lines
227        .iter()
228        .filter(|l| l.line_type != LineType::Remove)
229        .count();
230
231    let mut hunk = format!(
232        "@@ -{},{} +{},{} @@",
233        first_line.old_line, old_count, first_line.new_line, new_count
234    );
235
236    for line in lines {
237        let prefix = match line.line_type {
238            LineType::Add => "+",
239            LineType::Remove => "-",
240            LineType::Keep => " ",
241        };
242        hunk.push_str(&format!("\n{}{}", prefix, line.content));
243    }
244
245    hunk
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    #[test]
253    fn test_identical_content() {
254        let content = "line1\nline2\nline3\n";
255        let result = compute_diff(content, content, "test.txt");
256        assert!(!result.has_changes);
257        assert_eq!(result.lines_changed, 0);
258        assert!(result.changed_new_lines.is_empty());
259        assert!(result.diff.is_empty());
260    }
261
262    #[test]
263    fn test_empty_to_content() {
264        let old = "";
265        let new = "line1\nline2\n";
266        let result = compute_diff(old, new, "test.txt");
267        assert!(result.has_changes);
268        // similar crate uses -1,1 format for empty old content
269        assert!(result.diff.contains("+line1"));
270        assert!(result.diff.contains("+line2"));
271    }
272
273    #[test]
274    fn test_content_to_empty() {
275        let old = "line1\nline2\n";
276        let new = "";
277        let result = compute_diff(old, new, "test.txt");
278        assert!(result.has_changes);
279        // similar crate uses +1,0 format for empty new content
280        assert!(result.diff.contains("-line1"));
281        assert!(result.diff.contains("-line2"));
282    }
283
284    #[test]
285    fn test_single_line_change() {
286        let old = "hello world\n";
287        let new = "hello rust\n";
288        let result = compute_diff(old, new, "test.txt");
289        assert!(result.has_changes);
290        assert!(result.diff.contains("-hello world"));
291        assert!(result.diff.contains("+hello rust"));
292    }
293
294    #[test]
295    fn test_add_lines() {
296        let old = "line1\nline3\n";
297        let new = "line1\nline2\nline3\n";
298        let result = compute_diff(old, new, "test.txt");
299        assert!(result.has_changes);
300        assert!(result.diff.contains("+line2"));
301        // Line 2 is added, so it should be in changed_new_lines
302        assert!(result.changed_new_lines.contains(&2));
303    }
304
305    #[test]
306    fn test_remove_lines() {
307        let old = "line1\nline2\nline3\n";
308        let new = "line1\nline3\n";
309        let result = compute_diff(old, new, "test.txt");
310        assert!(result.has_changes);
311        assert!(result.diff.contains("-line2"));
312    }
313
314    #[test]
315    fn test_both_empty() {
316        let result = compute_diff("", "", "test.txt");
317        assert!(!result.has_changes);
318        assert_eq!(result.lines_changed, 0);
319    }
320
321    #[test]
322    fn test_hunk_header_pure_deletion() {
323        // All old lines deleted, nothing added — new side must show count 0
324        let result = compute_diff("line1\nline2\n", "", "test.txt");
325        assert!(result.has_changes);
326        assert!(
327            result.diff.contains("+1,0") || result.diff.contains("+0,0"),
328            "Pure deletion must show 0 new lines in @@ header, got:\n{}",
329            result.diff
330        );
331    }
332
333    #[test]
334    fn test_hunk_header_pure_insertion() {
335        // Nothing deleted, all new lines added — old side must show count 0
336        let result = compute_diff("", "line1\nline2\n", "test.txt");
337        assert!(result.has_changes);
338        assert!(
339            result.diff.contains("-1,0") || result.diff.contains("-0,0"),
340            "Pure insertion must show 0 old lines in @@ header, got:\n{}",
341            result.diff
342        );
343    }
344
345    #[test]
346    fn test_hunk_header_single_line_replacement() {
347        // 1 old line replaced by 1 new line — must be @@ -1,1 +1,1 @@
348        let result = compute_diff("hello world\n", "hello rust\n", "test.txt");
349        assert!(result.has_changes);
350        assert!(
351            result.diff.contains("-1,1") && result.diff.contains("+1,1"),
352            "1-for-1 replacement must produce @@ -1,1 +1,1 @@, got:\n{}",
353            result.diff
354        );
355    }
356
357    #[test]
358    fn test_changed_new_lines_tracking() {
359        let old = "a\nb\nc\nd\ne\n";
360        let new = "a\nb\nX\nd\nY\n";
361        let result = compute_diff(old, new, "test.txt");
362        assert!(result.has_changes);
363        // Lines 3 and 5 changed (new positions)
364        assert!(result.changed_new_lines.contains(&3));
365        assert!(result.changed_new_lines.contains(&5));
366    }
367}