git_bot_feedback/
git_diff.rs1use regex::Regex;
2use std::{collections::HashMap, ops::Range, path::PathBuf};
3
4use crate::{FileDiffLines, FileFilter, LinesChangedOnly};
5
6pub struct DiffHunkHeader {
8 pub old_start: u32,
10 pub old_lines: u32,
12 pub new_start: u32,
14 pub new_lines: u32,
16}
17
18fn get_filename_from_front_matter(front_matter: &str) -> Option<&str> {
19 let diff_file_name = Regex::new(r"(?m)^\+\+\+\sb?/(.*)$").unwrap();
20 let diff_renamed_file = Regex::new(r"(?m)^rename to (.*)$").unwrap();
21 let diff_binary_file = Regex::new(r"(?m)^Binary\sfiles\s").unwrap();
22 if let Some(captures) = diff_file_name.captures(front_matter) {
23 return Some(captures.get(1).unwrap().as_str());
24 }
25 if front_matter.starts_with("similarity")
26 && let Some(captures) = diff_renamed_file.captures(front_matter)
27 {
28 return Some(captures.get(1).unwrap().as_str());
29 }
30 if !diff_binary_file.is_match(front_matter) {
31 log::warn!("Unrecognized diff starting with:\n{}", front_matter);
32 }
33 None
34}
35
36static HUNK_INFO_PATTERN: &str = r"(?m)@@\s\-\d+,?\d*\s\+(\d+),?(\d*)\s@@";
38
39fn parse_patch(patch: &str) -> (Vec<u32>, Vec<Range<u32>>) {
46 let mut diff_hunks = Vec::new();
47 let mut additions = Vec::new();
48
49 let hunk_info = Regex::new(HUNK_INFO_PATTERN).unwrap();
50 let hunk_headers = hunk_info.captures_iter(patch).collect::<Vec<_>>();
51 if !hunk_headers.is_empty() {
52 let hunks = hunk_info.split(patch).skip(1);
54 for (hunk, header) in hunks.zip(hunk_headers) {
55 let [start_line, end_range] = header.extract().1.map(|v| v.parse::<u32>().unwrap_or(1));
57 let mut line_numb_in_diff = start_line;
58 diff_hunks.push(start_line..start_line + end_range);
59 for (line_index, line) in hunk.split('\n').enumerate() {
60 if line.starts_with('+') {
61 additions.push(line_numb_in_diff);
62 }
63 if line_index > 0 && !line.starts_with('-') {
64 line_numb_in_diff += 1;
65 }
66 }
67 }
68 }
69 (additions, diff_hunks)
70}
71
72pub fn parse_diff(
79 diff: &str,
80 file_filter: &FileFilter,
81 lines_changed_only: &LinesChangedOnly,
82) -> HashMap<String, FileDiffLines> {
83 let mut results = HashMap::new();
84 let diff_file_delimiter = Regex::new(r"(?m)^diff --git a/.*$").unwrap();
85 let hunk_info = Regex::new(HUNK_INFO_PATTERN).unwrap();
86
87 let file_diffs = diff_file_delimiter.split(diff);
88 for file_diff in file_diffs {
89 if file_diff.is_empty() || file_diff.starts_with("deleted file") {
90 continue;
91 }
92 let hunk_start = if let Some(first_hunk) = hunk_info.find(file_diff) {
93 first_hunk.start()
94 } else {
95 file_diff.len()
96 };
97 let front_matter = &file_diff[..hunk_start];
98 if let Some(file_name) = get_filename_from_front_matter(front_matter.trim_start()) {
99 let file_name = file_name.strip_prefix('/').unwrap_or(file_name);
100 let file_path = PathBuf::from(file_name);
101 if file_filter.is_not_ignored(&file_path) {
102 let (added_lines, diff_hunks) = parse_patch(&file_diff[hunk_start..]);
103 if lines_changed_only
104 .is_change_valid(!added_lines.is_empty(), !diff_hunks.is_empty())
105 {
106 results
107 .entry(file_name.to_string())
108 .or_insert_with(|| FileDiffLines::with_info(added_lines, diff_hunks));
109 }
110 }
111 }
112 }
113 results
114}
115
116#[cfg(test)]
118mod test {
119 use super::parse_diff;
120 use crate::{FileFilter, LinesChangedOnly};
121
122 const RENAMED_DIFF: &'static str = r#"diff --git a/tests/demo/some source.cpp b/tests/demo/some source.c
123similarity index 100%
124rename from /tests/demo/some source.cpp
125rename to /tests/demo/some source.c
126diff --git a/some picture.png b/some picture.png
127new file mode 100644
128Binary files /dev/null and b/some picture.png differ
129"#;
130
131 #[test]
132 fn parse_renamed_diff() {
133 let files = parse_diff(
134 RENAMED_DIFF,
135 &FileFilter::new(&[], &["c"], None),
136 &LinesChangedOnly::Off,
137 );
138 let git_file = files.get("tests/demo/some source.c").unwrap();
139 assert!(git_file.added_lines.is_empty());
140 assert!(git_file.diff_hunks.is_empty());
141 }
142
143 #[test]
144 fn parse_renamed_only_diff() {
145 let files = parse_diff(
146 RENAMED_DIFF,
147 &FileFilter::new(&[], &["c"], None),
148 &LinesChangedOnly::Diff,
149 );
150 assert!(files.is_empty());
151 }
152
153 const RENAMED_DIFF_WITH_CHANGES: &'static str = r#"diff --git a/tests/demo/some source.cpp b/tests/demo/some source.c
154similarity index 99%
155rename from /tests/demo/some source.cpp
156rename to /tests/demo/some source.c
157@@ -3,7 +3,7 @@
158\n \n \n-#include "math.h"
159+#include <math.h>\n \n \n \n"#;
160
161 #[test]
162 fn parse_renamed_diff_with_patch() {
163 let files = parse_diff(
164 &String::from_iter([RENAMED_DIFF_WITH_CHANGES, TERSE_HEADERS]),
165 &FileFilter::new(&["src/*"], &["c", "cpp"], None),
168 &LinesChangedOnly::On,
169 );
170 eprintln!("files: {files:#?}");
171 let git_file = files.get("tests/demo/some source.c").unwrap();
172 assert!(!git_file.is_line_in_diff(&1));
173 assert!(git_file.is_line_in_diff(&4));
174 }
175
176 const TYPICAL_DIFF: &str = "diff --git a/path/for/Some file.cpp b/path/to/Some file.cpp\n\
177 --- a/path/for/Some file.cpp\n\
178 +++ b/path/to/Some file.cpp\n\
179 @@ -3,7 +3,7 @@\n \n \n \n\
180 -#include <some_lib/render/animation.hpp>\n\
181 +#include <some_lib/render/animations.hpp>\n \n \n \n";
182
183 #[test]
184 fn parse_typical_diff() {
185 let files = parse_diff(
186 TYPICAL_DIFF,
187 &FileFilter::new(&[], &["cpp"], None),
188 &LinesChangedOnly::On,
189 );
190 assert!(!files.is_empty());
191 }
192
193 const BINARY_DIFF: &'static str = "diff --git a/some picture.png b/some picture.png\n\
194 new file mode 100644\n\
195 Binary files /dev/null and b/some picture.png differ\n";
196
197 #[test]
198 fn parse_binary_diff() {
199 let files = parse_diff(
200 BINARY_DIFF,
201 &FileFilter::new(&[], &["png"], None),
202 &LinesChangedOnly::Diff,
203 );
204 assert!(files.is_empty());
205 }
206
207 const TERSE_HEADERS: &'static str = r#"diff --git a/src/demo.cpp b/src/demo.cpp
208--- a/src/demo.cpp
209+++ b/src/demo.cpp
210@@ -3 +3 @@
211-#include <stdio.h>
212+#include "stdio.h"
213@@ -4,0 +5,2 @@
214+auto main() -> int
215+{
216@@ -18 +17,2 @@ int main(){
217- return 0;}
218+ return 0;
219+}"#;
220
221 #[test]
222 fn terse_hunk_header() {
223 let file_filter = FileFilter::new(&[], &["cpp"], None);
224 let files = parse_diff(TERSE_HEADERS, &file_filter, &LinesChangedOnly::Diff);
225 let file_diff = files.get("src/demo.cpp").unwrap();
226 assert_eq!(file_diff.diff_hunks, vec![3..4, 5..7, 17..19]);
227 }
228}