Skip to main content

context_builder/
diff.rs

1use similar::{ChangeTag, TextDiff};
2use std::collections::HashMap;
3
4/// Line based diff utilities.
5///
6/// This module previously exposed `generate_diff` which produced a single
7/// "## File Differences" section for an entire markdown document. That
8/// approach made it easy for volatile sections (timestamps, file tree
9/// structure, etc.) to create noisy diffs. To address this the new
10/// per‑file API lets the caller diff only the normalized *file content*
11/// blocks that appear under each `### File: `path`` heading in the
12/// canonical output, completely ignoring the global header or the file
13/// tree portion. Each file receives an isolated unified style diff.
14///
15/// High level additions:
16/// * `PerFileStatus` – classification of the change.
17/// * `PerFileDiff` – structured diff result for a single file.
18/// * `diff_file_contents` – core engine producing diffs per file without any
19///   global "## File Differences" header.
20/// * `render_per_file_diffs` – helper to render the per file diffs into
21///   markdown (still omits a global header so the caller can choose).
22///
23/// Backwards compatibility: the existing `generate_diff` function (full
24/// document diff) is retained for now. New code should prefer the
25/// per‑file functions.
26/// Determine number of context lines either from explicit argument or env.
27fn resolve_context_lines(explicit: Option<usize>) -> usize {
28    explicit
29        .filter(|v| *v > 0)
30        .or_else(|| {
31            std::env::var("CB_DIFF_CONTEXT_LINES")
32                .ok()
33                .and_then(|v| v.parse().ok())
34                .filter(|v: &usize| *v > 0)
35        })
36        .unwrap_or(3)
37}
38
39/// Original API: produce a single markdown section headed by "## File Differences".
40/// (Kept unchanged for compatibility.)
41pub fn generate_diff(old_content: &str, new_content: &str) -> String {
42    let diff = TextDiff::from_lines(old_content, new_content);
43    if diff.ratio() == 1.0 {
44        return String::new();
45    }
46    let context_lines = resolve_context_lines(None);
47    let grouped = diff.grouped_ops(context_lines);
48    let mut out = String::new();
49    out.push_str("## File Differences\n\n");
50    out.push_str("```diff\n");
51    for (group_index, group) in grouped.iter().enumerate() {
52        if group_index > 0 {
53            out.push_str("  ...\n");
54        }
55        // Emit standard unified diff hunk header for positional context
56        if let (Some(first), Some(last)) = (group.first(), group.last()) {
57            let old_start = first.old_range().start + 1;
58            let old_len = last.old_range().end - first.old_range().start;
59            let new_start = first.new_range().start + 1;
60            let new_len = last.new_range().end - first.new_range().start;
61            out.push_str(&format!(
62                "@@ -{},{} +{},{} @@\n",
63                old_start, old_len, new_start, new_len
64            ));
65        }
66        for op in group {
67            for change in diff.iter_changes(op) {
68                let tag = change.tag();
69                let mut line = change.to_string();
70                if line.ends_with('\n') {
71                    line.pop();
72                    if line.ends_with('\r') {
73                        line.pop();
74                    }
75                }
76
77                match tag {
78                    ChangeTag::Delete => {
79                        out.push_str("- ");
80                        out.push_str(&line);
81                        out.push('\n');
82                    }
83                    ChangeTag::Insert => {
84                        out.push_str("+ ");
85                        out.push_str(&line);
86                        out.push('\n');
87                    }
88                    ChangeTag::Equal => {
89                        out.push_str("  ");
90                        out.push_str(&line);
91                        out.push('\n');
92                    }
93                }
94            }
95        }
96    }
97    out.push_str("```\n\n");
98    out
99}
100
101/// Classification of how a file changed between two snapshots.
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub enum PerFileStatus {
104    Added,
105    Removed,
106    Modified,
107    Unchanged,
108}
109
110/// Structured diff result for a single file.
111#[derive(Debug, Clone)]
112pub struct PerFileDiff {
113    pub path: String,
114    pub status: PerFileStatus,
115    /// Unified diff fenced in ```diff (omitted when status == Unchanged and skip_unchanged=true)
116    pub diff: String,
117}
118
119impl PerFileDiff {
120    pub fn is_changed(&self) -> bool {
121        self.status != PerFileStatus::Unchanged
122    }
123}
124
125/// Produce a unified style diff for two text blobs WITHOUT adding any global
126/// section header. Returns empty string if contents are identical.
127fn unified_no_header(old: &str, new: &str, context_lines: usize) -> String {
128    let diff = TextDiff::from_lines(old, new);
129    if diff.ratio() == 1.0 {
130        return String::new();
131    }
132    let grouped = diff.grouped_ops(context_lines);
133    let mut out = String::new();
134    out.push_str("```diff\n");
135    for (group_index, group) in grouped.iter().enumerate() {
136        if group_index > 0 {
137            out.push_str("  ...\n");
138        }
139        // Emit standard unified diff hunk header for positional context
140        if let (Some(first), Some(last)) = (group.first(), group.last()) {
141            let old_start = first.old_range().start + 1;
142            let old_len = last.old_range().end - first.old_range().start;
143            let new_start = first.new_range().start + 1;
144            let new_len = last.new_range().end - first.new_range().start;
145            out.push_str(&format!(
146                "@@ -{},{} +{},{} @@\n",
147                old_start, old_len, new_start, new_len
148            ));
149        }
150        for op in group {
151            for change in diff.iter_changes(op) {
152                let tag = change.tag();
153                let mut line = change.to_string();
154                if line.ends_with('\n') {
155                    line.pop();
156                    if line.ends_with('\r') {
157                        line.pop();
158                    }
159                }
160
161                match tag {
162                    ChangeTag::Delete => {
163                        out.push_str("- ");
164                        out.push_str(&line);
165                        out.push('\n');
166                    }
167                    ChangeTag::Insert => {
168                        out.push_str("+ ");
169                        out.push_str(&line);
170                        out.push('\n');
171                    }
172                    ChangeTag::Equal => {
173                        out.push_str("  ");
174                        out.push_str(&line);
175                        out.push('\n');
176                    }
177                }
178            }
179        }
180    }
181    out.push_str("```\n");
182    out
183}
184
185/// Diff per file content sets.
186///
187/// Inputs are maps keyed by file path (relative or absolute – caller decides)
188/// with values being the raw file content EXACTLY as you wish it to be diffed
189/// (e.g. already stripped of volatile metadata, no size/modified lines, only
190/// the real file body). This keeps higher level logic (parsing the markdown
191/// document) out of the diff layer.
192///
193/// Returns a vector of `PerFileDiff` for every file that is Added, Removed,
194/// or Modified. Unchanged files are omitted by default (`skip_unchanged=true`)
195/// to reduce noise, but you can opt to include them.
196pub fn diff_file_contents(
197    previous: &HashMap<String, String>,
198    current: &HashMap<String, String>,
199    skip_unchanged: bool,
200    explicit_context: Option<usize>,
201) -> Vec<PerFileDiff> {
202    let mut all_paths: Vec<String> = previous.keys().chain(current.keys()).cloned().collect();
203    all_paths.sort();
204    all_paths.dedup();
205
206    let context_lines = resolve_context_lines(explicit_context);
207    let mut results = Vec::new();
208
209    for path in all_paths {
210        let old_opt = previous.get(&path);
211        let new_opt = current.get(&path);
212        match (old_opt, new_opt) {
213            (None, Some(new_content)) => {
214                // Added file: present only in current snapshot
215                let mut diff = String::new();
216                diff.push_str("```diff\n");
217                for line in new_content.lines() {
218                    diff.push_str("+ ");
219                    diff.push_str(line);
220                    diff.push('\n');
221                }
222                diff.push_str("```\n");
223                results.push(PerFileDiff {
224                    path,
225                    status: PerFileStatus::Added,
226                    diff,
227                });
228            }
229            (Some(_old_content), None) => {
230                // Removed file
231                let old_content = previous.get(&path).unwrap();
232                let mut diff = String::new();
233                diff.push_str("```diff\n");
234                for line in old_content.lines() {
235                    diff.push_str("- ");
236                    diff.push_str(line);
237                    diff.push('\n');
238                }
239                diff.push_str("```\n");
240                results.push(PerFileDiff {
241                    path,
242                    status: PerFileStatus::Removed,
243                    diff,
244                });
245            }
246            (Some(old_content), Some(new_content)) => {
247                if old_content == new_content {
248                    if !skip_unchanged {
249                        results.push(PerFileDiff {
250                            path,
251                            status: PerFileStatus::Unchanged,
252                            diff: String::new(),
253                        });
254                    }
255                } else {
256                    let diff = unified_no_header(old_content, new_content, context_lines);
257                    results.push(PerFileDiff {
258                        path,
259                        status: PerFileStatus::Modified,
260                        diff,
261                    });
262                }
263            }
264            (None, None) => unreachable!(),
265        }
266    }
267
268    results
269}
270
271/// Render a collection of per file diffs into markdown WITHOUT a global
272/// "## File Differences" header. Each file begins with a "### Diff: `<path>`"
273/// heading so that it can be appended near the changed files summary.
274pub fn render_per_file_diffs(diffs: &[PerFileDiff]) -> String {
275    let mut out = String::new();
276    for d in diffs {
277        out.push_str(&format!("### Diff: `{}`\n\n", d.path));
278        match d.status {
279            PerFileStatus::Added => out.push_str("_Status: Added_\n\n"),
280            PerFileStatus::Removed => out.push_str("_Status: Removed_\n\n"),
281            PerFileStatus::Modified => out.push_str("_Status: Modified_\n\n"),
282            PerFileStatus::Unchanged => {
283                out.push_str("_Status: Unchanged_\n\n");
284            }
285        }
286        if !d.diff.is_empty() {
287            out.push_str(&d.diff);
288            if !d.diff.ends_with('\n') {
289                out.push('\n');
290            }
291        }
292        out.push('\n');
293    }
294    out
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300
301    fn map(pairs: &[(&str, &str)]) -> HashMap<String, String> {
302        pairs
303            .iter()
304            .map(|(k, v)| (k.to_string(), v.to_string()))
305            .collect()
306    }
307
308    #[test]
309    fn unchanged_is_skipped() {
310        let prev = map(&[("a.txt", "one\n")]);
311        let curr = map(&[("a.txt", "one\n")]);
312        let diffs = diff_file_contents(&prev, &curr, true, Some(2));
313        assert!(diffs.is_empty());
314    }
315
316    #[test]
317    fn added_file_diff() {
318        let prev = map(&[]);
319        let curr = map(&[("new.rs", "fn main() {}\n")]);
320        let diffs = diff_file_contents(&prev, &curr, true, Some(2));
321        assert_eq!(diffs.len(), 1);
322        let d = &diffs[0];
323        assert_eq!(d.status, PerFileStatus::Added);
324        assert!(d.diff.contains("+ fn main() {}"));
325    }
326
327    #[test]
328    fn removed_file_diff() {
329        let prev = map(&[("old.rs", "fn old() {}\n")]);
330        let curr = map(&[]);
331        let diffs = diff_file_contents(&prev, &curr, true, None);
332        assert_eq!(diffs.len(), 1);
333        let d = &diffs[0];
334        assert_eq!(d.status, PerFileStatus::Removed);
335        assert!(d.diff.contains("- fn old() {}"));
336    }
337
338    #[test]
339    fn modified_file_diff() {
340        let prev = map(&[("lib.rs", "fn add(a:i32,b:i32)->i32{a+b}\n")]);
341        let curr = map(&[("lib.rs", "fn add(a: i32, b: i32) -> i32 { a + b }\n")]);
342        let diffs = diff_file_contents(&prev, &curr, true, Some(1));
343        assert_eq!(diffs.len(), 1);
344        let d = &diffs[0];
345        assert_eq!(d.status, PerFileStatus::Modified);
346        assert!(d.diff.contains("- fn add(a:i32,b:i32)->i32{a+b}"));
347        assert!(d.diff.contains("+ fn add(a: i32, b: i32) -> i32 { a + b }"));
348    }
349
350    #[test]
351    fn include_unchanged_when_requested() {
352        let prev = map(&[("a.txt", "same\n")]);
353        let curr = map(&[("a.txt", "same\n")]);
354        let diffs = diff_file_contents(&prev, &curr, false, None);
355        assert_eq!(diffs.len(), 1);
356        assert_eq!(diffs[0].status, PerFileStatus::Unchanged);
357    }
358
359    #[test]
360    fn render_output_basic() {
361        let prev = map(&[("a.txt", "one\n"), ("b.txt", "line1\nline2\n")]);
362        let curr = map(&[
363            ("a.txt", "two\n"),
364            ("b.txt", "line1\nline2\n"),
365            ("c.txt", "new file\n"),
366        ]);
367        let diffs = diff_file_contents(&prev, &curr, true, Some(1));
368        let out = render_per_file_diffs(&diffs);
369        assert!(out.contains("### Diff: `a.txt`"));
370        assert!(out.contains("_Status: Modified_"));
371        assert!(out.contains("+ two"));
372        assert!(out.contains("### Diff: `c.txt`"));
373        assert!(out.contains("_Status: Added_"));
374        assert!(out.contains("+ new file"));
375    }
376
377    #[test]
378    fn test_empty_files() {
379        let prev = map(&[("empty.txt", "")]);
380        let curr = map(&[("empty.txt", "")]);
381        let diffs = diff_file_contents(&prev, &curr, true, None);
382        assert!(diffs.is_empty());
383    }
384
385    #[test]
386    fn test_empty_to_content() {
387        let prev = map(&[("file.txt", "")]);
388        let curr = map(&[("file.txt", "new content\n")]);
389        let diffs = diff_file_contents(&prev, &curr, true, None);
390        assert_eq!(diffs.len(), 1);
391        assert_eq!(diffs[0].status, PerFileStatus::Modified);
392        assert!(diffs[0].diff.contains("+ new content"));
393    }
394
395    #[test]
396    fn test_content_to_empty() {
397        let prev = map(&[("file.txt", "old content\n")]);
398        let curr = map(&[("file.txt", "")]);
399        let diffs = diff_file_contents(&prev, &curr, true, None);
400        assert_eq!(diffs.len(), 1);
401        assert_eq!(diffs[0].status, PerFileStatus::Modified);
402        assert!(diffs[0].diff.contains("- old content"));
403    }
404
405    #[test]
406    fn test_multiline_modifications() {
407        let prev = map(&[("file.txt", "line1\nline2\nline3\nline4\n")]);
408        let curr = map(&[("file.txt", "line1\nmodified2\nline3\nline4\n")]);
409        let diffs = diff_file_contents(&prev, &curr, true, Some(2));
410        assert_eq!(diffs.len(), 1);
411        assert_eq!(diffs[0].status, PerFileStatus::Modified);
412        assert!(diffs[0].diff.contains("- line2"));
413        assert!(diffs[0].diff.contains("+ modified2"));
414    }
415
416    #[test]
417    fn test_windows_line_endings() {
418        let prev = map(&[("file.txt", "line1\r\nline2\r\n")]);
419        let curr = map(&[("file.txt", "line1\r\nmodified2\r\n")]);
420        let diffs = diff_file_contents(&prev, &curr, true, None);
421        assert_eq!(diffs.len(), 1);
422        assert_eq!(diffs[0].status, PerFileStatus::Modified);
423        assert!(diffs[0].diff.contains("- line2"));
424        assert!(diffs[0].diff.contains("+ modified2"));
425    }
426
427    #[test]
428    fn test_per_file_diff_is_changed() {
429        let added = PerFileDiff {
430            path: "test.txt".to_string(),
431            status: PerFileStatus::Added,
432            diff: "test".to_string(),
433        };
434        assert!(added.is_changed());
435
436        let removed = PerFileDiff {
437            path: "test.txt".to_string(),
438            status: PerFileStatus::Removed,
439            diff: "test".to_string(),
440        };
441        assert!(removed.is_changed());
442
443        let modified = PerFileDiff {
444            path: "test.txt".to_string(),
445            status: PerFileStatus::Modified,
446            diff: "test".to_string(),
447        };
448        assert!(modified.is_changed());
449
450        let unchanged = PerFileDiff {
451            path: "test.txt".to_string(),
452            status: PerFileStatus::Unchanged,
453            diff: String::new(),
454        };
455        assert!(!unchanged.is_changed());
456    }
457
458    #[test]
459    fn test_generate_diff_identical_content() {
460        let content = "line1\nline2\nline3\n";
461        let diff = generate_diff(content, content);
462        assert!(diff.is_empty());
463    }
464
465    #[test]
466    fn test_generate_diff_with_changes() {
467        let old = "line1\nline2\nline3\n";
468        let new = "line1\nmodified2\nline3\n";
469        let diff = generate_diff(old, new);
470        assert!(diff.contains("## File Differences"));
471        assert!(diff.contains("```diff"));
472        assert!(diff.contains("- line2"));
473        assert!(diff.contains("+ modified2"));
474    }
475
476    #[test]
477    fn test_resolve_context_lines_default() {
478        let context = resolve_context_lines(None);
479        assert_eq!(context, 3);
480    }
481
482    #[test]
483    fn test_resolve_context_lines_explicit() {
484        let context = resolve_context_lines(Some(5));
485        assert_eq!(context, 5);
486    }
487
488    #[test]
489    fn test_resolve_context_lines_zero_fallback() {
490        let context = resolve_context_lines(Some(0));
491        assert_eq!(context, 3); // Should fallback to default
492    }
493
494    #[test]
495    fn test_unicode_content_diff() {
496        let prev = map(&[("unicode.txt", "Hello 世界\n")]);
497        let curr = map(&[("unicode.txt", "Hello 世界! 🌍\n")]);
498        let diffs = diff_file_contents(&prev, &curr, true, None);
499        assert_eq!(diffs.len(), 1);
500        assert_eq!(diffs[0].status, PerFileStatus::Modified);
501        assert!(diffs[0].diff.contains("Hello 世界"));
502        assert!(diffs[0].diff.contains("🌍"));
503    }
504
505    #[test]
506    fn test_render_per_file_diffs_empty() {
507        let diffs = vec![];
508        let output = render_per_file_diffs(&diffs);
509        assert!(output.is_empty());
510    }
511
512    #[test]
513    fn test_render_per_file_diffs_unchanged() {
514        let diffs = vec![PerFileDiff {
515            path: "unchanged.txt".to_string(),
516            status: PerFileStatus::Unchanged,
517            diff: String::new(),
518        }];
519        let output = render_per_file_diffs(&diffs);
520        assert!(output.contains("### Diff: `unchanged.txt`"));
521        assert!(output.contains("_Status: Unchanged_"));
522    }
523
524    #[test]
525    fn test_render_per_file_diffs_without_trailing_newline() {
526        let diffs = vec![PerFileDiff {
527            path: "test.txt".to_string(),
528            status: PerFileStatus::Modified,
529            diff: "```diff\n+ line\n```".to_string(), // No trailing newline
530        }];
531        let output = render_per_file_diffs(&diffs);
532        assert!(output.contains("### Diff: `test.txt`"));
533        assert!(output.contains("_Status: Modified_"));
534        assert!(output.ends_with("\n\n")); // Should add newlines
535    }
536
537    #[test]
538    fn test_generate_diff_with_multiple_groups() {
539        // Create content that will result in multiple diff groups to trigger "..." separator
540        let old_content = "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10";
541        let new_content = "line1_modified\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9_modified\nline10";
542
543        let diff = generate_diff(old_content, new_content);
544        assert!(diff.contains("```diff"));
545        assert!(diff.contains("## File Differences"));
546        // With sufficient distance between changes and small context, should create groups with "..." separator
547        println!("Generated diff: {}", diff);
548    }
549
550    #[test]
551    fn test_diff_with_windows_line_endings() {
552        let old_content = "line1\r\nline2\r\n";
553        let new_content = "line1_modified\r\nline2\r\n";
554
555        let diff = generate_diff(old_content, new_content);
556        assert!(diff.contains("```diff"));
557        assert!(diff.contains("line1_modified"));
558        assert!(!diff.is_empty());
559    }
560
561    #[test]
562    fn test_unified_no_header_with_multiple_groups() {
563        // Create content that will result in multiple diff groups
564        let old_content = "start\n\n\n\n\n\n\n\n\n\nmiddle\n\n\n\n\n\n\n\n\n\nend";
565        let new_content =
566            "start_modified\n\n\n\n\n\n\n\n\n\nmiddle\n\n\n\n\n\n\n\n\n\nend_modified";
567
568        let diff = unified_no_header(old_content, new_content, 2);
569        assert!(diff.contains("```diff"));
570        // Should contain "..." separator between groups when changes are far apart
571        println!("Unified diff: {}", diff);
572    }
573
574    #[test]
575    fn test_unified_no_header_with_windows_line_endings() {
576        let old_content = "line1\r\nline2\r\n";
577        let new_content = "line1_modified\r\nline2\r\n";
578
579        let diff = unified_no_header(old_content, new_content, 3);
580        assert!(diff.contains("```diff"));
581        assert!(diff.contains("line1_modified"));
582        assert!(!diff.is_empty());
583    }
584}