Skip to main content

ai_agent/utils/
diff.rs

1// Source: /data/home/swei/claudecode/openclaudecode/src/utils/diff.ts
2//! Structured patch generation for file edits
3
4use similar::{ChangeTag, TextDiff};
5
6/// A single hunk from a structured diff
7#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8pub struct StructuredPatchHunk {
9    pub old_start: usize,
10    pub old_lines: usize,
11    pub new_start: usize,
12    pub new_lines: usize,
13    /// Lines prefixed with ' ' (context), '+' (added), or '-' (removed)
14    pub lines: Vec<String>,
15}
16
17/// Context lines around each hunk
18pub const CONTEXT_LINES: usize = 3;
19
20/// Generate a structured patch from old and new file contents.
21/// Uses the `similar` crate for line-level diffing.
22pub fn generate_patch(old_content: &str, new_content: &str) -> Vec<StructuredPatchHunk> {
23    let old_escaped = escape_for_diff(old_content);
24    let new_escaped = escape_for_diff(new_content);
25
26    if old_escaped.lines().count() == 0 && new_escaped.lines().count() == 0 {
27        return Vec::new();
28    }
29
30    let mut changes: Vec<(ChangeTag, &str)> = Vec::new();
31    for change in TextDiff::from_lines(&old_escaped, &new_escaped).iter_all_changes() {
32        changes.push((change.tag(), change.value()));
33    }
34
35    if changes.is_empty() {
36        return Vec::new();
37    }
38
39    // Group consecutive changes into hunks with context
40    let mut hunks: Vec<StructuredPatchHunk> = Vec::new();
41    let mut current: Vec<(ChangeTag, Vec<String>)> = Vec::new();
42    let mut trailing_context: Vec<(ChangeTag, Vec<String>)> = Vec::new();
43
44    for (i, (tag, value)) in changes.iter().enumerate() {
45        let lines: Vec<String> = value.lines().map(|l| unescape_from_diff(l)).collect();
46
47        match tag {
48            ChangeTag::Delete | ChangeTag::Insert => {
49                // Flush trailing context as new group
50                if !trailing_context.is_empty() {
51                    current = trailing_context.clone();
52                    trailing_context.clear();
53                }
54                let prefix = match tag {
55                    ChangeTag::Delete => '-',
56                    ChangeTag::Insert => '+',
57                    _ => ' ',
58                };
59                for line in lines {
60                    current.push((*tag, vec![format!("{}{}", prefix, line)]));
61                }
62            }
63            ChangeTag::Equal => {
64                if !current.is_empty() && trailing_context.len() < CONTEXT_LINES {
65                    trailing_context.push((*tag, lines));
66                } else if !current.is_empty() {
67                    // Context is too far from changes — flush current hunk
68                    hunks.push(build_hunk(&current));
69                    current = trailing_context.clone();
70                    trailing_context.clear();
71                }
72                // If current is empty, just skip standalone context
73            }
74        }
75    }
76
77    if !current.is_empty() {
78        // Use the index tracking to get line numbers
79        // Simplified: compute positions from the changes
80        let mut pos_old = 0usize;
81        let mut pos_new = 0usize;
82        let mut first_hunk_old = None;
83        let mut first_hunk_new = None;
84        let mut total_old = 0usize;
85        let mut total_new = 0usize;
86
87        for (tag, prefixed_lines) in &current {
88            let line_count = prefixed_lines.len();
89            match tag {
90                ChangeTag::Delete => {
91                    if first_hunk_old.is_none() {
92                        first_hunk_old = Some(pos_old);
93                    }
94                    total_old += line_count;
95                    pos_old += line_count;
96                }
97                ChangeTag::Insert => {
98                    if first_hunk_new.is_none() {
99                        first_hunk_new = Some(pos_new);
100                    }
101                    total_new += line_count;
102                    pos_new += line_count;
103                }
104                ChangeTag::Equal => {
105                    if first_hunk_old.is_none() {
106                        first_hunk_old = Some(pos_old);
107                    }
108                    if first_hunk_new.is_none() {
109                        first_hunk_new = Some(pos_new);
110                    }
111                    total_old += line_count;
112                    total_new += line_count;
113                    pos_old += line_count;
114                    pos_new += line_count;
115                }
116            }
117        }
118
119        let all_lines: Vec<String> = current
120            .iter()
121            .flat_map(|(_, lines)| lines.clone())
122            .collect();
123        hunks.push(StructuredPatchHunk {
124            old_start: first_hunk_old.unwrap_or(0),
125            old_lines: total_old,
126            new_start: first_hunk_new.unwrap_or(0),
127            new_lines: total_new,
128            lines: all_lines,
129        });
130    }
131
132    hunks
133}
134
135/// Count lines added and removed from a structured patch.
136pub fn count_lines_changed(
137    patch: &[StructuredPatchHunk],
138    new_file_content: Option<&str>,
139) -> (usize, usize) {
140    if patch.is_empty() {
141        if let Some(content) = new_file_content {
142            let additions = content.lines().count();
143            return (additions, 0);
144        }
145        return (0, 0);
146    }
147
148    let additions = patch
149        .iter()
150        .flat_map(|h| &h.lines)
151        .filter(|l| l.starts_with('+'))
152        .count();
153
154    let removals = patch
155        .iter()
156        .flat_map(|h| &h.lines)
157        .filter(|l| l.starts_with('-'))
158        .count();
159
160    (additions, removals)
161}
162
163// & and $ confuse the diff library, so we replace them with tokens
164const AMPERSAND_TOKEN: &str = "<<:AMPERSAND_TOKEN:>>";
165const DOLLAR_TOKEN: &str = "<<:DOLLAR_TOKEN:>>";
166
167fn escape_for_diff(s: &str) -> String {
168    s.replace('&', AMPERSAND_TOKEN).replace('$', DOLLAR_TOKEN)
169}
170
171fn unescape_from_diff(s: &str) -> String {
172    s.replace(AMPERSAND_TOKEN, "&").replace(DOLLAR_TOKEN, "$")
173}
174
175/// Build a hunk from grouped changes, computing line positions.
176fn build_hunk(current: &[(ChangeTag, Vec<String>)]) -> StructuredPatchHunk {
177    let mut pos_old = 0usize;
178    let mut pos_new = 0usize;
179    let mut first_hunk_old = None;
180    let mut first_hunk_new = None;
181    let mut total_old = 0usize;
182    let mut total_new = 0usize;
183
184    for (tag, prefixed_lines) in current {
185        let line_count = prefixed_lines.len();
186        match tag {
187            ChangeTag::Delete => {
188                if first_hunk_old.is_none() {
189                    first_hunk_old = Some(pos_old);
190                }
191                total_old += line_count;
192                pos_old += line_count;
193            }
194            ChangeTag::Insert => {
195                if first_hunk_new.is_none() {
196                    first_hunk_new = Some(pos_new);
197                }
198                total_new += line_count;
199                pos_new += line_count;
200            }
201            ChangeTag::Equal => {
202                if first_hunk_old.is_none() {
203                    first_hunk_old = Some(pos_old);
204                }
205                if first_hunk_new.is_none() {
206                    first_hunk_new = Some(pos_new);
207                }
208                total_old += line_count;
209                total_new += line_count;
210                pos_old += line_count;
211                pos_new += line_count;
212            }
213        }
214    }
215
216    let all_lines: Vec<String> = current
217        .iter()
218        .flat_map(|(_, lines)| lines.clone())
219        .collect();
220    StructuredPatchHunk {
221        old_start: first_hunk_old.unwrap_or(0),
222        old_lines: total_old,
223        new_start: first_hunk_new.unwrap_or(0),
224        new_lines: total_new,
225        lines: all_lines,
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    #[test]
234    fn test_generate_patch_simple_edit() {
235        let old = "Hello\nWorld\nFoo";
236        let new = "Hello\nRust\nFoo";
237        let hunks = generate_patch(old, new);
238        assert!(!hunks.is_empty());
239
240        let first_hunk = &hunks[0];
241        let has_addition = first_hunk.lines.iter().any(|l| l.starts_with('+'));
242        let has_removal = first_hunk.lines.iter().any(|l| l.starts_with('-'));
243        assert!(has_addition);
244        assert!(has_removal);
245    }
246
247    #[test]
248    fn test_generate_patch_no_changes() {
249        let content = "Hello\nWorld";
250        let hunks = generate_patch(content, content);
251        assert!(hunks.is_empty());
252    }
253
254    #[test]
255    fn test_generate_patch_new_file() {
256        let old = "";
257        let new = "Hello\nWorld";
258        let hunks = generate_patch(old, new);
259        assert!(!hunks.is_empty());
260    }
261
262    #[test]
263    fn test_generate_patch_deletion() {
264        let old = "Hello\nWorld\nFoo";
265        let new = "Hello\nFoo";
266        let hunks = generate_patch(old, new);
267        assert!(!hunks.is_empty());
268    }
269
270    #[test]
271    fn test_count_lines_changed() {
272        let old = "a\nb\nc";
273        let new = "a\nX\nc";
274        let hunks = generate_patch(old, new);
275        let (additions, removals) = count_lines_changed(&hunks, None);
276        assert_eq!(additions, 1);
277        assert_eq!(removals, 1);
278    }
279
280    #[test]
281    fn test_count_lines_changed_empty() {
282        let (additions, removals) = count_lines_changed(&[], Some("hello\nworld"));
283        assert_eq!(additions, 2);
284        assert_eq!(removals, 0);
285    }
286
287    #[test]
288    fn test_escape_ampsersand() {
289        assert_eq!(escape_for_diff("a & b"), "a <<:AMPERSAND_TOKEN:>> b");
290        assert_eq!(escape_for_diff("x$y"), "x<<:DOLLAR_TOKEN:>>y");
291    }
292
293    #[test]
294    fn test_unescape() {
295        assert_eq!(unescape_from_diff("a <<:AMPERSAND_TOKEN:>> b"), "a & b");
296        assert_eq!(unescape_from_diff("x<<:DOLLAR_TOKEN:>>y"), "x$y");
297    }
298}