context_builder/
diff.rs

1use similar::{ChangeTag, TextDiff};
2use std::collections::HashMap;
3
4/// Line based diff utilities.
5///
6/// This module previously exposed `generate_diff` which produced a single
7/// "## File Differences" section for an entire markdown document. That
8/// approach made it easy for volatile sections (timestamps, file tree
9/// structure, etc.) to create noisy diffs. To address this the new
10/// per‑file API lets the caller diff only the normalized *file content*
11/// blocks that appear under each `### File: `path`` heading in the
12/// canonical output, completely ignoring the global header or the file
13/// tree portion. Each file receives an isolated unified style diff.
14///
15/// High level additions:
16/// * `PerFileStatus` – classification of the change.
17/// * `PerFileDiff` – structured diff result for a single file.
18/// * `diff_file_contents` – core engine producing diffs per file without any
19///   global "## File Differences" header.
20/// * `render_per_file_diffs` – helper to render the per file diffs into
21///   markdown (still omits a global header so the caller can choose).
22///
23/// Backwards compatibility: the existing `generate_diff` function (full
24/// document diff) is retained for now. New code should prefer the
25/// per‑file functions.
26/// Determine number of context lines either from explicit argument or env.
27fn resolve_context_lines(explicit: Option<usize>) -> usize {
28    explicit
29        .filter(|v| *v > 0)
30        .or_else(|| {
31            std::env::var("CB_DIFF_CONTEXT_LINES")
32                .ok()
33                .and_then(|v| v.parse().ok())
34                .filter(|v: &usize| *v > 0)
35        })
36        .unwrap_or(3)
37}
38
39/// Original API: produce a single markdown section headed by "## File Differences".
40/// (Kept unchanged for compatibility.)
41pub fn generate_diff(old_content: &str, new_content: &str) -> String {
42    let diff = TextDiff::from_lines(old_content, new_content);
43    if diff.ratio() == 1.0 {
44        return String::new();
45    }
46    let context_lines = resolve_context_lines(None);
47    let grouped = diff.grouped_ops(context_lines);
48    let mut out = String::new();
49    out.push_str("## File Differences\n\n");
50    out.push_str("```diff\n");
51    for (group_index, group) in grouped.iter().enumerate() {
52        if group_index > 0 {
53            out.push_str("  ...\n");
54        }
55        for op in group {
56            for change in diff.iter_changes(op) {
57                let tag = change.tag();
58                let mut line = change.to_string();
59                if line.ends_with('\n') {
60                    line.pop();
61                    if line.ends_with('\r') {
62                        line.pop();
63                    }
64                }
65                if line.trim_start().starts_with("```") {
66                    continue;
67                }
68                match tag {
69                    ChangeTag::Delete => {
70                        out.push_str("- ");
71                        out.push_str(&line);
72                        out.push('\n');
73                    }
74                    ChangeTag::Insert => {
75                        out.push_str("+ ");
76                        out.push_str(&line);
77                        out.push('\n');
78                    }
79                    ChangeTag::Equal => {
80                        out.push_str("  ");
81                        out.push_str(&line);
82                        out.push('\n');
83                    }
84                }
85            }
86        }
87    }
88    out.push_str("```\n\n");
89    out
90}
91
92/// Classification of how a file changed between two snapshots.
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub enum PerFileStatus {
95    Added,
96    Removed,
97    Modified,
98    Unchanged,
99}
100
101/// Structured diff result for a single file.
102#[derive(Debug, Clone)]
103pub struct PerFileDiff {
104    pub path: String,
105    pub status: PerFileStatus,
106    /// Unified diff fenced in ```diff (omitted when status == Unchanged and skip_unchanged=true)
107    pub diff: String,
108}
109
110impl PerFileDiff {
111    pub fn is_changed(&self) -> bool {
112        self.status != PerFileStatus::Unchanged
113    }
114}
115
116/// Produce a unified style diff for two text blobs WITHOUT adding any global
117/// section header. Returns empty string if contents are identical.
118fn unified_no_header(old: &str, new: &str, context_lines: usize) -> String {
119    let diff = TextDiff::from_lines(old, new);
120    if diff.ratio() == 1.0 {
121        return String::new();
122    }
123    let grouped = diff.grouped_ops(context_lines);
124    let mut out = String::new();
125    out.push_str("```diff\n");
126    for (group_index, group) in grouped.iter().enumerate() {
127        if group_index > 0 {
128            out.push_str("  ...\n");
129        }
130        for op in group {
131            for change in diff.iter_changes(op) {
132                let tag = change.tag();
133                let mut line = change.to_string();
134                if line.ends_with('\n') {
135                    line.pop();
136                    if line.ends_with('\r') {
137                        line.pop();
138                    }
139                }
140                if line.trim_start().starts_with("```") {
141                    continue;
142                }
143                match tag {
144                    ChangeTag::Delete => {
145                        out.push_str("- ");
146                        out.push_str(&line);
147                        out.push('\n');
148                    }
149                    ChangeTag::Insert => {
150                        out.push_str("+ ");
151                        out.push_str(&line);
152                        out.push('\n');
153                    }
154                    ChangeTag::Equal => {
155                        out.push_str("  ");
156                        out.push_str(&line);
157                        out.push('\n');
158                    }
159                }
160            }
161        }
162    }
163    out.push_str("```\n");
164    out
165}
166
167/// Diff per file content sets.
168///
169/// Inputs are maps keyed by file path (relative or absolute – caller decides)
170/// with values being the raw file content EXACTLY as you wish it to be diffed
171/// (e.g. already stripped of volatile metadata, no size/modified lines, only
172/// the real file body). This keeps higher level logic (parsing the markdown
173/// document) out of the diff layer.
174///
175/// Returns a vector of `PerFileDiff` for every file that is Added, Removed,
176/// or Modified. Unchanged files are omitted by default (`skip_unchanged=true`)
177/// to reduce noise, but you can opt to include them.
178pub fn diff_file_contents(
179    previous: &HashMap<String, String>,
180    current: &HashMap<String, String>,
181    skip_unchanged: bool,
182    explicit_context: Option<usize>,
183) -> Vec<PerFileDiff> {
184    let mut all_paths: Vec<String> = previous.keys().chain(current.keys()).cloned().collect();
185    all_paths.sort();
186    all_paths.dedup();
187
188    let context_lines = resolve_context_lines(explicit_context);
189    let mut results = Vec::new();
190
191    for path in all_paths {
192        let old_opt = previous.get(&path);
193        let new_opt = current.get(&path);
194        match (old_opt, new_opt) {
195            (None, Some(new_content)) => {
196                // Added file: present only in current snapshot
197                let mut diff = String::new();
198                diff.push_str("```diff\n");
199                for line in new_content.lines() {
200                    if line.trim_start().starts_with("```") {
201                        continue;
202                    }
203                    diff.push_str("+ ");
204                    diff.push_str(line);
205                    diff.push('\n');
206                }
207                diff.push_str("```\n");
208                results.push(PerFileDiff {
209                    path,
210                    status: PerFileStatus::Added,
211                    diff,
212                });
213            }
214            (Some(_old_content), None) => {
215                // Removed file
216                let old_content = previous.get(&path).unwrap();
217                let mut diff = String::new();
218                diff.push_str("```diff\n");
219                for line in old_content.lines() {
220                    if line.trim_start().starts_with("```") {
221                        continue;
222                    }
223                    diff.push_str("- ");
224                    diff.push_str(line);
225                    diff.push('\n');
226                }
227                diff.push_str("```\n");
228                results.push(PerFileDiff {
229                    path,
230                    status: PerFileStatus::Removed,
231                    diff,
232                });
233            }
234            (Some(old_content), Some(new_content)) => {
235                if old_content == new_content {
236                    if !skip_unchanged {
237                        results.push(PerFileDiff {
238                            path,
239                            status: PerFileStatus::Unchanged,
240                            diff: String::new(),
241                        });
242                    }
243                } else {
244                    let diff = unified_no_header(old_content, new_content, context_lines);
245                    results.push(PerFileDiff {
246                        path,
247                        status: PerFileStatus::Modified,
248                        diff,
249                    });
250                }
251            }
252            (None, None) => unreachable!(),
253        }
254    }
255
256    results
257}
258
259/// Render a collection of per file diffs into markdown WITHOUT a global
260/// "## File Differences" header. Each file begins with a "### Diff: `<path>`"
261/// heading so that it can be appended near the changed files summary.
262pub fn render_per_file_diffs(diffs: &[PerFileDiff]) -> String {
263    let mut out = String::new();
264    for d in diffs {
265        out.push_str(&format!("### Diff: `{}`\n\n", d.path));
266        match d.status {
267            PerFileStatus::Added => out.push_str("_Status: Added_\n\n"),
268            PerFileStatus::Removed => out.push_str("_Status: Removed_\n\n"),
269            PerFileStatus::Modified => out.push_str("_Status: Modified_\n\n"),
270            PerFileStatus::Unchanged => {
271                out.push_str("_Status: Unchanged_\n\n");
272            }
273        }
274        if !d.diff.is_empty() {
275            out.push_str(&d.diff);
276            if !d.diff.ends_with('\n') {
277                out.push('\n');
278            }
279        }
280        out.push('\n');
281    }
282    out
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    fn map(pairs: &[(&str, &str)]) -> HashMap<String, String> {
290        pairs
291            .iter()
292            .map(|(k, v)| (k.to_string(), v.to_string()))
293            .collect()
294    }
295
296    #[test]
297    fn unchanged_is_skipped() {
298        let prev = map(&[("a.txt", "one\n")]);
299        let curr = map(&[("a.txt", "one\n")]);
300        let diffs = diff_file_contents(&prev, &curr, true, Some(2));
301        assert!(diffs.is_empty());
302    }
303
304    #[test]
305    fn added_file_diff() {
306        let prev = map(&[]);
307        let curr = map(&[("new.rs", "fn main() {}\n")]);
308        let diffs = diff_file_contents(&prev, &curr, true, Some(2));
309        assert_eq!(diffs.len(), 1);
310        let d = &diffs[0];
311        assert_eq!(d.status, PerFileStatus::Added);
312        assert!(d.diff.contains("+ fn main() {}"));
313    }
314
315    #[test]
316    fn removed_file_diff() {
317        let prev = map(&[("old.rs", "fn old() {}\n")]);
318        let curr = map(&[]);
319        let diffs = diff_file_contents(&prev, &curr, true, None);
320        assert_eq!(diffs.len(), 1);
321        let d = &diffs[0];
322        assert_eq!(d.status, PerFileStatus::Removed);
323        assert!(d.diff.contains("- fn old() {}"));
324    }
325
326    #[test]
327    fn modified_file_diff() {
328        let prev = map(&[("lib.rs", "fn add(a:i32,b:i32)->i32{a+b}\n")]);
329        let curr = map(&[("lib.rs", "fn add(a: i32, b: i32) -> i32 { a + b }\n")]);
330        let diffs = diff_file_contents(&prev, &curr, true, Some(1));
331        assert_eq!(diffs.len(), 1);
332        let d = &diffs[0];
333        assert_eq!(d.status, PerFileStatus::Modified);
334        assert!(d.diff.contains("- fn add(a:i32,b:i32)->i32{a+b}"));
335        assert!(d.diff.contains("+ fn add(a: i32, b: i32) -> i32 { a + b }"));
336    }
337
338    #[test]
339    fn include_unchanged_when_requested() {
340        let prev = map(&[("a.txt", "same\n")]);
341        let curr = map(&[("a.txt", "same\n")]);
342        let diffs = diff_file_contents(&prev, &curr, false, None);
343        assert_eq!(diffs.len(), 1);
344        assert_eq!(diffs[0].status, PerFileStatus::Unchanged);
345    }
346
347    #[test]
348    fn render_output_basic() {
349        let prev = map(&[("a.txt", "one\n"), ("b.txt", "line1\nline2\n")]);
350        let curr = map(&[
351            ("a.txt", "two\n"),
352            ("b.txt", "line1\nline2\n"),
353            ("c.txt", "new file\n"),
354        ]);
355        let diffs = diff_file_contents(&prev, &curr, true, Some(1));
356        let out = render_per_file_diffs(&diffs);
357        assert!(out.contains("### Diff: `a.txt`"));
358        assert!(out.contains("_Status: Modified_"));
359        assert!(out.contains("+ two"));
360        assert!(out.contains("### Diff: `c.txt`"));
361        assert!(out.contains("_Status: Added_"));
362        assert!(out.contains("+ new file"));
363    }
364}