Skip to main content

vtcode_commons/
diff_paths.rs

1use std::path::Path;
2
3/// Parse `diff --git a/... b/...` line and return normalized new path.
4pub fn parse_diff_git_path(line: &str) -> Option<String> {
5    let mut parts = line.split_whitespace();
6    if parts.next()? != "diff" || parts.next()? != "--git" {
7        return None;
8    }
9    let _old = parts.next()?;
10    let new_path = parts.next()?;
11    Some(new_path.trim_start_matches("b/").to_string())
12}
13
14/// Parse unified diff marker line (`---`/`+++`) and return normalized path.
15pub fn parse_diff_marker_path(line: &str) -> Option<String> {
16    let trimmed = line.trim_start();
17    if !(is_diff_old_file_marker_line(trimmed) || is_diff_new_file_marker_line(trimmed)) {
18        return None;
19    }
20    let path = trimmed.split_whitespace().nth(1)?;
21    if path == "/dev/null" {
22        return None;
23    }
24    Some(
25        path.trim_start_matches("a/")
26            .trim_start_matches("b/")
27            .to_string(),
28    )
29}
30
31/// Convert file path to language hint based on extension.
32pub fn language_hint_from_path(path: &str) -> Option<String> {
33    Path::new(path)
34        .extension()
35        .and_then(|ext| ext.to_str())
36        .filter(|ext| !ext.is_empty())
37        .map(|ext| ext.to_ascii_lowercase())
38}
39
40/// Whether a line is a unified diff addition content line (`+...`, excluding `+++` marker).
41pub fn is_diff_addition_line(line: &str) -> bool {
42    line.starts_with('+') && !line.starts_with("+++")
43}
44
45/// Whether a line is a unified diff removal content line (`-...`, excluding `---` marker).
46pub fn is_diff_deletion_line(line: &str) -> bool {
47    line.starts_with('-') && !line.starts_with("---")
48}
49
50/// Whether a line is a unified diff old-file marker (`--- ...`).
51pub fn is_diff_old_file_marker_line(line: &str) -> bool {
52    line.starts_with("--- ")
53}
54
55/// Whether a line is a unified diff new-file marker (`+++ ...`).
56pub fn is_diff_new_file_marker_line(line: &str) -> bool {
57    line.starts_with("+++ ")
58}
59
60/// Whether a line is an apply_patch operation header.
61pub fn is_apply_patch_header_line(line: &str) -> bool {
62    line.starts_with("*** Begin Patch")
63        || line.starts_with("*** Update File:")
64        || line.starts_with("*** Add File:")
65        || line.starts_with("*** Delete File:")
66}
67
68/// Whether a line is a recognized diff metadata/header line.
69pub fn is_diff_header_line(line: &str) -> bool {
70    line.starts_with("diff --git ")
71        || line.starts_with("@@")
72        || line.starts_with("index ")
73        || line.starts_with("new file mode ")
74        || line.starts_with("deleted file mode ")
75        || line.starts_with("rename from ")
76        || line.starts_with("rename to ")
77        || line.starts_with("copy from ")
78        || line.starts_with("copy to ")
79        || line.starts_with("similarity index ")
80        || line.starts_with("dissimilarity index ")
81        || line.starts_with("old mode ")
82        || line.starts_with("new mode ")
83        || line.starts_with("Binary files ")
84        || line.starts_with("\\ No newline at end of file")
85        || is_diff_new_file_marker_line(line)
86        || is_diff_old_file_marker_line(line)
87        || is_apply_patch_header_line(line)
88}
89
90/// Heuristic classifier for unified/git diff content.
91///
92/// This intentionally avoids classifying plain source code containing `+`/`-`
93/// lines as a diff unless there are structural diff markers.
94pub fn looks_like_diff_content(content: &str) -> bool {
95    let mut has_git_header = false;
96    let mut has_hunk = false;
97    let mut has_old_marker = false;
98    let mut has_new_marker = false;
99    let mut has_add = false;
100    let mut has_del = false;
101    let mut has_binary_or_mode_header = false;
102    let mut has_apply_patch = false;
103
104    for raw in content.lines() {
105        let line = raw.trim_start();
106        if line.is_empty() {
107            continue;
108        }
109
110        if line.starts_with("diff --git ") {
111            has_git_header = true;
112            continue;
113        }
114        if line.starts_with("@@") {
115            has_hunk = true;
116            continue;
117        }
118        if is_diff_old_file_marker_line(line) {
119            has_old_marker = true;
120            continue;
121        }
122        if is_diff_new_file_marker_line(line) {
123            has_new_marker = true;
124            continue;
125        }
126        if is_apply_patch_header_line(line) {
127            has_apply_patch = true;
128            continue;
129        }
130        if line.starts_with("new file mode ")
131            || line.starts_with("deleted file mode ")
132            || line.starts_with("rename from ")
133            || line.starts_with("rename to ")
134            || line.starts_with("copy from ")
135            || line.starts_with("copy to ")
136            || line.starts_with("similarity index ")
137            || line.starts_with("dissimilarity index ")
138            || line.starts_with("old mode ")
139            || line.starts_with("new mode ")
140            || line.starts_with("Binary files ")
141            || line.starts_with("index ")
142            || line.starts_with("\\ No newline at end of file")
143        {
144            has_binary_or_mode_header = true;
145            continue;
146        }
147
148        if is_diff_addition_line(line) {
149            has_add = true;
150            continue;
151        }
152        if is_diff_deletion_line(line) {
153            has_del = true;
154        }
155    }
156
157    if has_apply_patch {
158        return true;
159    }
160    if has_git_header && (has_hunk || has_old_marker || has_new_marker || has_binary_or_mode_header)
161    {
162        return true;
163    }
164    if has_hunk && (has_old_marker || has_new_marker || has_add || has_del) {
165        return true;
166    }
167    if has_old_marker && has_new_marker && (has_add || has_del) {
168        return true;
169    }
170
171    false
172}
173
174/// Parse unified diff hunk header starts from `@@ -old,+new @@`.
175pub fn parse_hunk_starts(line: &str) -> Option<(usize, usize)> {
176    let trimmed = line.trim_end();
177    let rest = trimmed.strip_prefix("@@ -")?;
178    let mut parts = rest.split_whitespace();
179    let old_part = parts.next()?;
180    let new_part = parts.next()?;
181    if !new_part.starts_with('+') {
182        return None;
183    }
184
185    let old_start = old_part.split(',').next()?.parse::<usize>().ok()?;
186    let new_start = new_part
187        .trim_start_matches('+')
188        .split(',')
189        .next()?
190        .parse::<usize>()
191        .ok()?;
192    Some((old_start, new_start))
193}
194
195/// Normalize hunk header to start-only form: `@@ -old +new @@`.
196pub fn format_start_only_hunk_header(line: &str) -> Option<String> {
197    let (old_start, new_start) = parse_hunk_starts(line)?;
198    Some(format!("@@ -{} +{} @@", old_start, new_start))
199}
200
201#[cfg(test)]
202mod tests {
203    use super::{
204        format_start_only_hunk_header, is_apply_patch_header_line, is_diff_addition_line,
205        is_diff_deletion_line, is_diff_header_line, is_diff_new_file_marker_line,
206        is_diff_old_file_marker_line, language_hint_from_path, looks_like_diff_content,
207        parse_diff_git_path, parse_diff_marker_path, parse_hunk_starts,
208    };
209
210    #[test]
211    fn parses_git_diff_path() {
212        let line = "diff --git a/src/lib.rs b/src/lib.rs";
213        assert_eq!(parse_diff_git_path(line).as_deref(), Some("src/lib.rs"));
214    }
215
216    #[test]
217    fn parses_marker_path() {
218        assert_eq!(
219            parse_diff_marker_path("+++ b/src/main.rs").as_deref(),
220            Some("src/main.rs")
221        );
222        assert_eq!(parse_diff_marker_path("--- /dev/null"), None);
223    }
224
225    #[test]
226    fn infers_language_hint_from_extension() {
227        assert_eq!(
228            language_hint_from_path("src/main.RS").as_deref(),
229            Some("rs")
230        );
231        assert_eq!(language_hint_from_path("Makefile"), None);
232    }
233
234    #[test]
235    fn parses_hunk_starts() {
236        assert_eq!(parse_hunk_starts("@@ -536,4 +540,5 @@"), Some((536, 540)));
237        assert_eq!(parse_hunk_starts("not a hunk"), None);
238    }
239
240    #[test]
241    fn formats_start_only_hunk_header() {
242        assert_eq!(
243            format_start_only_hunk_header("@@ -536,4 +540,5 @@"),
244            Some("@@ -536 +540 @@".to_string())
245        );
246    }
247
248    #[test]
249    fn detects_diff_add_remove_lines() {
250        assert!(is_diff_addition_line("+added"));
251        assert!(!is_diff_addition_line("+++ b/file.rs"));
252        assert!(is_diff_deletion_line("-removed"));
253        assert!(!is_diff_deletion_line("--- a/file.rs"));
254    }
255
256    #[test]
257    fn detects_diff_header_lines() {
258        assert!(is_diff_header_line("diff --git a/a b/a"));
259        assert!(is_diff_header_line("@@ -1 +1 @@"));
260        assert!(is_diff_header_line("+++ b/src/main.rs"));
261        assert!(!is_diff_header_line("println!(\"diff --git\");"));
262    }
263
264    #[test]
265    fn detects_marker_and_apply_patch_header_lines() {
266        assert!(is_diff_old_file_marker_line("--- a/src/lib.rs"));
267        assert!(is_diff_new_file_marker_line("+++ b/src/lib.rs"));
268        assert!(is_apply_patch_header_line("*** Update File: src/lib.rs"));
269        assert!(!is_apply_patch_header_line("*** End Patch"));
270    }
271
272    #[test]
273    fn classifies_git_diff_content() {
274        let diff = "diff --git a/a.rs b/a.rs\n@@ -1 +1 @@\n-old\n+new\n";
275        assert!(looks_like_diff_content(diff));
276    }
277
278    #[test]
279    fn classifies_apply_patch_content() {
280        let patch = "*** Begin Patch\n*** Update File: a.rs\n@@\n-old\n+new\n*** End Patch\n";
281        assert!(looks_like_diff_content(patch));
282    }
283
284    #[test]
285    fn avoids_false_positive_for_regular_code() {
286        let code =
287            "fn delta(x: i32) -> i32 {\n    let y = x + 1;\n    let z = x - 1;\n    y + z\n}\n";
288        assert!(!looks_like_diff_content(code));
289    }
290
291    #[test]
292    fn avoids_false_positive_for_plus_minus_logs() {
293        let log = "+ started service\n- previous pid cleaned\n";
294        assert!(!looks_like_diff_content(log));
295    }
296}