git_bot_feedback/
git_diff.rs1use regex::Regex;
2use std::{collections::HashMap, ops::Range, path::PathBuf};
3
4use crate::{FileDiffLines, FileFilter, LinesChangedOnly, error::DiffError};
5
6pub struct DiffHunkHeader {
8 pub old_start: u32,
10 pub old_lines: u32,
12 pub new_start: u32,
14 pub new_lines: u32,
16}
17
18fn get_filename_from_front_matter(front_matter: &str) -> Result<Option<&str>, DiffError> {
19 let diff_file_name = Regex::new(r"(?m)^\+\+\+\sb?/(.*)$")?;
20 let diff_renamed_file = Regex::new(r"(?m)^rename to (.*)$")?;
21 let diff_binary_file = Regex::new(r"(?m)^Binary\sfiles\s")?;
22 if let Some(captures) = diff_file_name.captures(front_matter)
23 && let Some(name) = captures.get(1)
24 {
25 return Ok(Some(name.as_str()));
26 }
27 if front_matter.starts_with("similarity")
28 && let Some(captures) = diff_renamed_file.captures(front_matter)
29 && let Some(name) = captures.get(1)
30 {
31 return Ok(Some(name.as_str()));
32 }
33 if !diff_binary_file.is_match(front_matter) {
34 log::warn!("Unrecognized diff starting with:\n{}", front_matter);
35 }
36 Ok(None)
37}
38
39static HUNK_INFO_PATTERN: &str = r"(?m)@@\s\-\d+,?\d*\s\+(\d+),?(\d*)\s@@";
41
42fn parse_patch(patch: &str) -> Result<(Vec<u32>, Vec<Range<u32>>), DiffError> {
49 let mut diff_hunks = Vec::new();
50 let mut additions = Vec::new();
51
52 let hunk_info = Regex::new(HUNK_INFO_PATTERN)?;
53 let hunk_headers = hunk_info.captures_iter(patch).collect::<Vec<_>>();
54 if !hunk_headers.is_empty() {
55 let hunks = hunk_info.split(patch).skip(1);
57 for (hunk, header) in hunks.zip(hunk_headers) {
58 let [start_line, end_range] = header.extract().1.map(|v| v.parse::<u32>().unwrap_or(1));
60 let mut line_numb_in_diff = start_line;
61 diff_hunks.push(start_line..start_line + end_range);
62 for (line_index, line) in hunk.split('\n').enumerate() {
63 if line.starts_with('+') {
64 additions.push(line_numb_in_diff);
65 }
66 if line_index > 0 && !line.starts_with('-') {
67 line_numb_in_diff += 1;
68 }
69 }
70 }
71 }
72 Ok((additions, diff_hunks))
73}
74
75pub fn parse_diff(
82 diff: &str,
83 file_filter: &FileFilter,
84 lines_changed_only: &LinesChangedOnly,
85) -> Result<HashMap<String, FileDiffLines>, DiffError> {
86 let mut results = HashMap::new();
87 let diff_file_delimiter = Regex::new(r"(?m)^diff \-\-git a/.*$")?;
88 let hunk_info = Regex::new(HUNK_INFO_PATTERN)?;
89
90 let file_diffs = diff_file_delimiter.split(diff);
91 for file_diff in file_diffs {
92 if file_diff.is_empty() || file_diff.starts_with("deleted file") {
93 continue;
94 }
95 let hunk_start = if let Some(first_hunk) = hunk_info.find(file_diff) {
96 first_hunk.start()
97 } else {
98 file_diff.len()
99 };
100 let front_matter = &file_diff[..hunk_start];
101 if let Some(file_name) = get_filename_from_front_matter(front_matter.trim_start())? {
102 let file_name = file_name.strip_prefix('/').unwrap_or(file_name);
103 let file_path = PathBuf::from(file_name);
104 if file_filter.is_qualified(&file_path) {
105 let (added_lines, diff_hunks) = parse_patch(&file_diff[hunk_start..])?;
106 if lines_changed_only
107 .is_change_valid(!added_lines.is_empty(), !diff_hunks.is_empty())
108 {
109 results
110 .entry(file_name.to_string())
111 .or_insert_with(|| FileDiffLines::with_info(added_lines, diff_hunks));
112 }
113 }
114 }
115 }
116 Ok(results)
117}
118
119#[cfg(test)]
121mod test {
122 #![allow(clippy::unwrap_used)]
123
124 use super::parse_diff;
125 use crate::{FileFilter, LinesChangedOnly};
126
127 const RENAMED_DIFF: &str = r#"diff --git a/tests/demo/some source.cpp b/tests/demo/some source.c
128similarity index 100%
129rename from /tests/demo/some source.cpp
130rename to /tests/demo/some source.c
131diff --git a/some picture.png b/some picture.png
132new file mode 100644
133Binary files /dev/null and b/some picture.png differ
134"#;
135
136 #[test]
137 fn parse_renamed_diff() {
138 let files = parse_diff(
139 RENAMED_DIFF,
140 &FileFilter::new(&[], &["c"], None),
141 &LinesChangedOnly::Off,
142 )
143 .unwrap();
144 let git_file = files.get("tests/demo/some source.c").unwrap();
145 assert!(git_file.added_lines.is_empty());
146 assert!(git_file.diff_hunks.is_empty());
147 }
148
149 #[test]
150 fn parse_renamed_only_diff() {
151 let files = parse_diff(
152 RENAMED_DIFF,
153 &FileFilter::new(&[], &["c"], None),
154 &LinesChangedOnly::Diff,
155 )
156 .unwrap();
157 assert!(files.is_empty());
158 }
159
160 const RENAMED_DIFF_WITH_CHANGES: &str = r#"diff --git a/tests/demo/some source.cpp b/tests/demo/some source.c
161similarity index 99%
162rename from /tests/demo/some source.cpp
163rename to /tests/demo/some source.c
164@@ -3,7 +3,7 @@
165\n \n \n-#include "math.h"
166+#include <math.h>\n \n \n \n"#;
167
168 #[test]
169 fn parse_renamed_diff_with_patch() {
170 let files = parse_diff(
171 &String::from_iter([RENAMED_DIFF_WITH_CHANGES, TERSE_HEADERS]),
172 &FileFilter::new(&["src/*"], &["c", "cpp"], None),
175 &LinesChangedOnly::On,
176 )
177 .unwrap();
178 eprintln!("files: {files:#?}");
179 let git_file = files.get("tests/demo/some source.c").unwrap();
180 assert!(!git_file.is_line_in_diff(&1));
181 assert!(git_file.is_line_in_diff(&4));
182 }
183
184 const TYPICAL_DIFF: &str = "diff --git a/path/for/Some file.cpp b/path/to/Some file.cpp\n\
185 --- a/path/for/Some file.cpp\n\
186 +++ b/path/to/Some file.cpp\n\
187 @@ -3,7 +3,7 @@\n \n \n \n\
188 -#include <some_lib/render/animation.hpp>\n\
189 +#include <some_lib/render/animations.hpp>\n \n \n \n";
190
191 #[test]
192 fn parse_typical_diff() {
193 let files = parse_diff(
194 TYPICAL_DIFF,
195 &FileFilter::new(&[], &["cpp"], None),
196 &LinesChangedOnly::On,
197 )
198 .unwrap();
199 assert!(!files.is_empty());
200 }
201
202 const BINARY_DIFF: &str = "diff --git a/some picture.png b/some picture.png\n\
203 new file mode 100644\n\
204 Binary files /dev/null and b/some picture.png differ\n";
205
206 #[test]
207 fn parse_binary_diff() {
208 let files = parse_diff(
209 BINARY_DIFF,
210 &FileFilter::new(&[], &["png"], None),
211 &LinesChangedOnly::Diff,
212 )
213 .unwrap();
214 assert!(files.is_empty());
215 }
216
217 const TERSE_HEADERS: &str = r#"diff --git a/src/demo.cpp b/src/demo.cpp
218--- a/src/demo.cpp
219+++ b/src/demo.cpp
220@@ -3 +3 @@
221-#include <stdio.h>
222+#include "stdio.h"
223@@ -4,0 +5,2 @@
224+auto main() -> int
225+{
226@@ -18 +17,2 @@ int main(){
227- return 0;}
228+ return 0;
229+}"#;
230
231 #[test]
232 fn terse_hunk_header() {
233 let file_filter = FileFilter::new(&[], &["cpp"], None);
234 let files = parse_diff(TERSE_HEADERS, &file_filter, &LinesChangedOnly::Diff).unwrap();
235 let file_diff = files.get("src/demo.cpp").unwrap();
236 assert_eq!(file_diff.diff_hunks, vec![3..4, 5..7, 17..19]);
237 }
238}