context_builder/
diff.rs

1use similar::{ChangeTag, TextDiff};
2use std::collections::HashMap;
3
4/// Line based diff utilities.
5///
6/// This module previously exposed `generate_diff` which produced a single
7/// "## File Differences" section for an entire markdown document. That
8/// approach made it easy for volatile sections (timestamps, file tree
9/// structure, etc.) to create noisy diffs. To address this the new
10/// per‑file API lets the caller diff only the normalized *file content*
11/// blocks that appear under each `### File: `path`` heading in the
12/// canonical output, completely ignoring the global header or the file
13/// tree portion. Each file receives an isolated unified style diff.
14///
15/// High level additions:
16/// * `PerFileStatus` – classification of the change.
17/// * `PerFileDiff` – structured diff result for a single file.
18/// * `diff_file_contents` – core engine producing diffs per file without any
19///   global "## File Differences" header.
20/// * `render_per_file_diffs` – helper to render the per file diffs into
21///   markdown (still omits a global header so the caller can choose).
22///
23/// Backwards compatibility: the existing `generate_diff` function (full
24/// document diff) is retained for now. New code should prefer the
25/// per‑file functions.
26/// Determine number of context lines either from explicit argument or env.
27fn resolve_context_lines(explicit: Option<usize>) -> usize {
28    explicit
29        .filter(|v| *v > 0)
30        .or_else(|| {
31            std::env::var("CB_DIFF_CONTEXT_LINES")
32                .ok()
33                .and_then(|v| v.parse().ok())
34                .filter(|v: &usize| *v > 0)
35        })
36        .unwrap_or(3)
37}
38
39/// Original API: produce a single markdown section headed by "## File Differences".
40/// (Kept unchanged for compatibility.)
41pub fn generate_diff(old_content: &str, new_content: &str) -> String {
42    let diff = TextDiff::from_lines(old_content, new_content);
43    if diff.ratio() == 1.0 {
44        return String::new();
45    }
46    let context_lines = resolve_context_lines(None);
47    let grouped = diff.grouped_ops(context_lines);
48    let mut out = String::new();
49    out.push_str("## File Differences\n\n");
50    out.push_str("```diff\n");
51    for (group_index, group) in grouped.iter().enumerate() {
52        if group_index > 0 {
53            out.push_str("  ...\n");
54        }
55        for op in group {
56            for change in diff.iter_changes(op) {
57                let tag = change.tag();
58                let mut line = change.to_string();
59                if line.ends_with('\n') {
60                    line.pop();
61                    if line.ends_with('\r') {
62                        line.pop();
63                    }
64                }
65
66                match tag {
67                    ChangeTag::Delete => {
68                        out.push_str("- ");
69                        out.push_str(&line);
70                        out.push('\n');
71                    }
72                    ChangeTag::Insert => {
73                        out.push_str("+ ");
74                        out.push_str(&line);
75                        out.push('\n');
76                    }
77                    ChangeTag::Equal => {
78                        out.push_str("  ");
79                        out.push_str(&line);
80                        out.push('\n');
81                    }
82                }
83            }
84        }
85    }
86    out.push_str("```\n\n");
87    out
88}
89
90/// Classification of how a file changed between two snapshots.
91#[derive(Debug, Clone, PartialEq, Eq)]
92pub enum PerFileStatus {
93    Added,
94    Removed,
95    Modified,
96    Unchanged,
97}
98
99/// Structured diff result for a single file.
100#[derive(Debug, Clone)]
101pub struct PerFileDiff {
102    pub path: String,
103    pub status: PerFileStatus,
104    /// Unified diff fenced in ```diff (omitted when status == Unchanged and skip_unchanged=true)
105    pub diff: String,
106}
107
108impl PerFileDiff {
109    pub fn is_changed(&self) -> bool {
110        self.status != PerFileStatus::Unchanged
111    }
112}
113
114/// Produce a unified style diff for two text blobs WITHOUT adding any global
115/// section header. Returns empty string if contents are identical.
116fn unified_no_header(old: &str, new: &str, context_lines: usize) -> String {
117    let diff = TextDiff::from_lines(old, new);
118    if diff.ratio() == 1.0 {
119        return String::new();
120    }
121    let grouped = diff.grouped_ops(context_lines);
122    let mut out = String::new();
123    out.push_str("```diff\n");
124    for (group_index, group) in grouped.iter().enumerate() {
125        if group_index > 0 {
126            out.push_str("  ...\n");
127        }
128        for op in group {
129            for change in diff.iter_changes(op) {
130                let tag = change.tag();
131                let mut line = change.to_string();
132                if line.ends_with('\n') {
133                    line.pop();
134                    if line.ends_with('\r') {
135                        line.pop();
136                    }
137                }
138
139                match tag {
140                    ChangeTag::Delete => {
141                        out.push_str("- ");
142                        out.push_str(&line);
143                        out.push('\n');
144                    }
145                    ChangeTag::Insert => {
146                        out.push_str("+ ");
147                        out.push_str(&line);
148                        out.push('\n');
149                    }
150                    ChangeTag::Equal => {
151                        out.push_str("  ");
152                        out.push_str(&line);
153                        out.push('\n');
154                    }
155                }
156            }
157        }
158    }
159    out.push_str("```\n");
160    out
161}
162
163/// Diff per file content sets.
164///
165/// Inputs are maps keyed by file path (relative or absolute – caller decides)
166/// with values being the raw file content EXACTLY as you wish it to be diffed
167/// (e.g. already stripped of volatile metadata, no size/modified lines, only
168/// the real file body). This keeps higher level logic (parsing the markdown
169/// document) out of the diff layer.
170///
171/// Returns a vector of `PerFileDiff` for every file that is Added, Removed,
172/// or Modified. Unchanged files are omitted by default (`skip_unchanged=true`)
173/// to reduce noise, but you can opt to include them.
174pub fn diff_file_contents(
175    previous: &HashMap<String, String>,
176    current: &HashMap<String, String>,
177    skip_unchanged: bool,
178    explicit_context: Option<usize>,
179) -> Vec<PerFileDiff> {
180    let mut all_paths: Vec<String> = previous.keys().chain(current.keys()).cloned().collect();
181    all_paths.sort();
182    all_paths.dedup();
183
184    let context_lines = resolve_context_lines(explicit_context);
185    let mut results = Vec::new();
186
187    for path in all_paths {
188        let old_opt = previous.get(&path);
189        let new_opt = current.get(&path);
190        match (old_opt, new_opt) {
191            (None, Some(new_content)) => {
192                // Added file: present only in current snapshot
193                let mut diff = String::new();
194                diff.push_str("```diff\n");
195                for line in new_content.lines() {
196                    diff.push_str("+ ");
197                    diff.push_str(line);
198                    diff.push('\n');
199                }
200                diff.push_str("```\n");
201                results.push(PerFileDiff {
202                    path,
203                    status: PerFileStatus::Added,
204                    diff,
205                });
206            }
207            (Some(_old_content), None) => {
208                // Removed file
209                let old_content = previous.get(&path).unwrap();
210                let mut diff = String::new();
211                diff.push_str("```diff\n");
212                for line in old_content.lines() {
213                    diff.push_str("- ");
214                    diff.push_str(line);
215                    diff.push('\n');
216                }
217                diff.push_str("```\n");
218                results.push(PerFileDiff {
219                    path,
220                    status: PerFileStatus::Removed,
221                    diff,
222                });
223            }
224            (Some(old_content), Some(new_content)) => {
225                if old_content == new_content {
226                    if !skip_unchanged {
227                        results.push(PerFileDiff {
228                            path,
229                            status: PerFileStatus::Unchanged,
230                            diff: String::new(),
231                        });
232                    }
233                } else {
234                    let diff = unified_no_header(old_content, new_content, context_lines);
235                    results.push(PerFileDiff {
236                        path,
237                        status: PerFileStatus::Modified,
238                        diff,
239                    });
240                }
241            }
242            (None, None) => unreachable!(),
243        }
244    }
245
246    results
247}
248
249/// Render a collection of per file diffs into markdown WITHOUT a global
250/// "## File Differences" header. Each file begins with a "### Diff: `<path>`"
251/// heading so that it can be appended near the changed files summary.
252pub fn render_per_file_diffs(diffs: &[PerFileDiff]) -> String {
253    let mut out = String::new();
254    for d in diffs {
255        out.push_str(&format!("### Diff: `{}`\n\n", d.path));
256        match d.status {
257            PerFileStatus::Added => out.push_str("_Status: Added_\n\n"),
258            PerFileStatus::Removed => out.push_str("_Status: Removed_\n\n"),
259            PerFileStatus::Modified => out.push_str("_Status: Modified_\n\n"),
260            PerFileStatus::Unchanged => {
261                out.push_str("_Status: Unchanged_\n\n");
262            }
263        }
264        if !d.diff.is_empty() {
265            out.push_str(&d.diff);
266            if !d.diff.ends_with('\n') {
267                out.push('\n');
268            }
269        }
270        out.push('\n');
271    }
272    out
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278
279    fn map(pairs: &[(&str, &str)]) -> HashMap<String, String> {
280        pairs
281            .iter()
282            .map(|(k, v)| (k.to_string(), v.to_string()))
283            .collect()
284    }
285
286    #[test]
287    fn unchanged_is_skipped() {
288        let prev = map(&[("a.txt", "one\n")]);
289        let curr = map(&[("a.txt", "one\n")]);
290        let diffs = diff_file_contents(&prev, &curr, true, Some(2));
291        assert!(diffs.is_empty());
292    }
293
294    #[test]
295    fn added_file_diff() {
296        let prev = map(&[]);
297        let curr = map(&[("new.rs", "fn main() {}\n")]);
298        let diffs = diff_file_contents(&prev, &curr, true, Some(2));
299        assert_eq!(diffs.len(), 1);
300        let d = &diffs[0];
301        assert_eq!(d.status, PerFileStatus::Added);
302        assert!(d.diff.contains("+ fn main() {}"));
303    }
304
305    #[test]
306    fn removed_file_diff() {
307        let prev = map(&[("old.rs", "fn old() {}\n")]);
308        let curr = map(&[]);
309        let diffs = diff_file_contents(&prev, &curr, true, None);
310        assert_eq!(diffs.len(), 1);
311        let d = &diffs[0];
312        assert_eq!(d.status, PerFileStatus::Removed);
313        assert!(d.diff.contains("- fn old() {}"));
314    }
315
316    #[test]
317    fn modified_file_diff() {
318        let prev = map(&[("lib.rs", "fn add(a:i32,b:i32)->i32{a+b}\n")]);
319        let curr = map(&[("lib.rs", "fn add(a: i32, b: i32) -> i32 { a + b }\n")]);
320        let diffs = diff_file_contents(&prev, &curr, true, Some(1));
321        assert_eq!(diffs.len(), 1);
322        let d = &diffs[0];
323        assert_eq!(d.status, PerFileStatus::Modified);
324        assert!(d.diff.contains("- fn add(a:i32,b:i32)->i32{a+b}"));
325        assert!(d.diff.contains("+ fn add(a: i32, b: i32) -> i32 { a + b }"));
326    }
327
328    #[test]
329    fn include_unchanged_when_requested() {
330        let prev = map(&[("a.txt", "same\n")]);
331        let curr = map(&[("a.txt", "same\n")]);
332        let diffs = diff_file_contents(&prev, &curr, false, None);
333        assert_eq!(diffs.len(), 1);
334        assert_eq!(diffs[0].status, PerFileStatus::Unchanged);
335    }
336
337    #[test]
338    fn render_output_basic() {
339        let prev = map(&[("a.txt", "one\n"), ("b.txt", "line1\nline2\n")]);
340        let curr = map(&[
341            ("a.txt", "two\n"),
342            ("b.txt", "line1\nline2\n"),
343            ("c.txt", "new file\n"),
344        ]);
345        let diffs = diff_file_contents(&prev, &curr, true, Some(1));
346        let out = render_per_file_diffs(&diffs);
347        assert!(out.contains("### Diff: `a.txt`"));
348        assert!(out.contains("_Status: Modified_"));
349        assert!(out.contains("+ two"));
350        assert!(out.contains("### Diff: `c.txt`"));
351        assert!(out.contains("_Status: Added_"));
352        assert!(out.contains("+ new file"));
353    }
354
355    #[test]
356    fn test_empty_files() {
357        let prev = map(&[("empty.txt", "")]);
358        let curr = map(&[("empty.txt", "")]);
359        let diffs = diff_file_contents(&prev, &curr, true, None);
360        assert!(diffs.is_empty());
361    }
362
363    #[test]
364    fn test_empty_to_content() {
365        let prev = map(&[("file.txt", "")]);
366        let curr = map(&[("file.txt", "new content\n")]);
367        let diffs = diff_file_contents(&prev, &curr, true, None);
368        assert_eq!(diffs.len(), 1);
369        assert_eq!(diffs[0].status, PerFileStatus::Modified);
370        assert!(diffs[0].diff.contains("+ new content"));
371    }
372
373    #[test]
374    fn test_content_to_empty() {
375        let prev = map(&[("file.txt", "old content\n")]);
376        let curr = map(&[("file.txt", "")]);
377        let diffs = diff_file_contents(&prev, &curr, true, None);
378        assert_eq!(diffs.len(), 1);
379        assert_eq!(diffs[0].status, PerFileStatus::Modified);
380        assert!(diffs[0].diff.contains("- old content"));
381    }
382
383    #[test]
384    fn test_multiline_modifications() {
385        let prev = map(&[("file.txt", "line1\nline2\nline3\nline4\n")]);
386        let curr = map(&[("file.txt", "line1\nmodified2\nline3\nline4\n")]);
387        let diffs = diff_file_contents(&prev, &curr, true, Some(2));
388        assert_eq!(diffs.len(), 1);
389        assert_eq!(diffs[0].status, PerFileStatus::Modified);
390        assert!(diffs[0].diff.contains("- line2"));
391        assert!(diffs[0].diff.contains("+ modified2"));
392    }
393
394    #[test]
395    fn test_windows_line_endings() {
396        let prev = map(&[("file.txt", "line1\r\nline2\r\n")]);
397        let curr = map(&[("file.txt", "line1\r\nmodified2\r\n")]);
398        let diffs = diff_file_contents(&prev, &curr, true, None);
399        assert_eq!(diffs.len(), 1);
400        assert_eq!(diffs[0].status, PerFileStatus::Modified);
401        assert!(diffs[0].diff.contains("- line2"));
402        assert!(diffs[0].diff.contains("+ modified2"));
403    }
404
405    #[test]
406    fn test_per_file_diff_is_changed() {
407        let added = PerFileDiff {
408            path: "test.txt".to_string(),
409            status: PerFileStatus::Added,
410            diff: "test".to_string(),
411        };
412        assert!(added.is_changed());
413
414        let removed = PerFileDiff {
415            path: "test.txt".to_string(),
416            status: PerFileStatus::Removed,
417            diff: "test".to_string(),
418        };
419        assert!(removed.is_changed());
420
421        let modified = PerFileDiff {
422            path: "test.txt".to_string(),
423            status: PerFileStatus::Modified,
424            diff: "test".to_string(),
425        };
426        assert!(modified.is_changed());
427
428        let unchanged = PerFileDiff {
429            path: "test.txt".to_string(),
430            status: PerFileStatus::Unchanged,
431            diff: String::new(),
432        };
433        assert!(!unchanged.is_changed());
434    }
435
436    #[test]
437    fn test_generate_diff_identical_content() {
438        let content = "line1\nline2\nline3\n";
439        let diff = generate_diff(content, content);
440        assert!(diff.is_empty());
441    }
442
443    #[test]
444    fn test_generate_diff_with_changes() {
445        let old = "line1\nline2\nline3\n";
446        let new = "line1\nmodified2\nline3\n";
447        let diff = generate_diff(old, new);
448        assert!(diff.contains("## File Differences"));
449        assert!(diff.contains("```diff"));
450        assert!(diff.contains("- line2"));
451        assert!(diff.contains("+ modified2"));
452    }
453
454    #[test]
455    fn test_resolve_context_lines_default() {
456        let context = resolve_context_lines(None);
457        assert_eq!(context, 3);
458    }
459
460    #[test]
461    fn test_resolve_context_lines_explicit() {
462        let context = resolve_context_lines(Some(5));
463        assert_eq!(context, 5);
464    }
465
466    #[test]
467    fn test_resolve_context_lines_zero_fallback() {
468        let context = resolve_context_lines(Some(0));
469        assert_eq!(context, 3); // Should fallback to default
470    }
471
472    #[test]
473    fn test_unicode_content_diff() {
474        let prev = map(&[("unicode.txt", "Hello 世界\n")]);
475        let curr = map(&[("unicode.txt", "Hello 世界! 🌍\n")]);
476        let diffs = diff_file_contents(&prev, &curr, true, None);
477        assert_eq!(diffs.len(), 1);
478        assert_eq!(diffs[0].status, PerFileStatus::Modified);
479        assert!(diffs[0].diff.contains("Hello 世界"));
480        assert!(diffs[0].diff.contains("🌍"));
481    }
482
483    #[test]
484    fn test_render_per_file_diffs_empty() {
485        let diffs = vec![];
486        let output = render_per_file_diffs(&diffs);
487        assert!(output.is_empty());
488    }
489
490    #[test]
491    fn test_render_per_file_diffs_unchanged() {
492        let diffs = vec![PerFileDiff {
493            path: "unchanged.txt".to_string(),
494            status: PerFileStatus::Unchanged,
495            diff: String::new(),
496        }];
497        let output = render_per_file_diffs(&diffs);
498        assert!(output.contains("### Diff: `unchanged.txt`"));
499        assert!(output.contains("_Status: Unchanged_"));
500    }
501
502    #[test]
503    fn test_render_per_file_diffs_without_trailing_newline() {
504        let diffs = vec![PerFileDiff {
505            path: "test.txt".to_string(),
506            status: PerFileStatus::Modified,
507            diff: "```diff\n+ line\n```".to_string(), // No trailing newline
508        }];
509        let output = render_per_file_diffs(&diffs);
510        assert!(output.contains("### Diff: `test.txt`"));
511        assert!(output.contains("_Status: Modified_"));
512        assert!(output.ends_with("\n\n")); // Should add newlines
513    }
514
515    #[test]
516    fn test_generate_diff_with_multiple_groups() {
517        // Create content that will result in multiple diff groups to trigger "..." separator
518        let old_content = "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10";
519        let new_content = "line1_modified\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9_modified\nline10";
520
521        let diff = generate_diff(old_content, new_content);
522        assert!(diff.contains("```diff"));
523        assert!(diff.contains("## File Differences"));
524        // With sufficient distance between changes and small context, should create groups with "..." separator
525        println!("Generated diff: {}", diff);
526    }
527
528    #[test]
529    fn test_diff_with_windows_line_endings() {
530        let old_content = "line1\r\nline2\r\n";
531        let new_content = "line1_modified\r\nline2\r\n";
532
533        let diff = generate_diff(old_content, new_content);
534        assert!(diff.contains("```diff"));
535        assert!(diff.contains("line1_modified"));
536        assert!(!diff.is_empty());
537    }
538
539    #[test]
540    fn test_unified_no_header_with_multiple_groups() {
541        // Create content that will result in multiple diff groups
542        let old_content = "start\n\n\n\n\n\n\n\n\n\nmiddle\n\n\n\n\n\n\n\n\n\nend";
543        let new_content =
544            "start_modified\n\n\n\n\n\n\n\n\n\nmiddle\n\n\n\n\n\n\n\n\n\nend_modified";
545
546        let diff = unified_no_header(old_content, new_content, 2);
547        assert!(diff.contains("```diff"));
548        // Should contain "..." separator between groups when changes are far apart
549        println!("Unified diff: {}", diff);
550    }
551
552    #[test]
553    fn test_unified_no_header_with_windows_line_endings() {
554        let old_content = "line1\r\nline2\r\n";
555        let new_content = "line1_modified\r\nline2\r\n";
556
557        let diff = unified_no_header(old_content, new_content, 3);
558        assert!(diff.contains("```diff"));
559        assert!(diff.contains("line1_modified"));
560        assert!(!diff.is_empty());
561    }
562}