1use std::{ops::RangeInclusive, path::PathBuf};
12
13use anyhow::{Context, Result};
14use git2::{Diff, Error, Patch, Repository};
16
17use crate::common_fs::{FileFilter, FileObj};
19
20pub fn open_repo(path: &str) -> Result<Repository, Error> {
26 Repository::open(PathBuf::from(path).as_path())
27}
28
29fn get_sha(repo: &Repository, depth: Option<u32>) -> Result<git2::Object<'_>, Error> {
34 match depth {
35 Some(int) => repo.revparse_single(format!("HEAD~{}", int).as_str()),
36 None => repo.revparse_single("HEAD"),
37 }
38}
39
40pub fn get_diff(repo: &Repository) -> Result<git2::Diff> {
50 let head = get_sha(repo, None).unwrap().peel_to_tree().unwrap();
51 let mut has_staged_files = false;
52 for entry in repo.statuses(None).unwrap().iter() {
53 if entry.status().bits()
54 & (git2::Status::INDEX_NEW.bits()
55 | git2::Status::INDEX_MODIFIED.bits()
56 | git2::Status::INDEX_RENAMED.bits())
57 > 0
58 {
59 has_staged_files = true;
60 break;
61 }
62 }
63
64 if has_staged_files {
65 repo.diff_tree_to_index(Some(&head), None, None)
67 .with_context(|| "Could not get diff for current changes in local repo index")
68 } else {
69 let base = get_sha(repo, Some(1)).unwrap().peel_to_tree().unwrap();
71 repo.diff_tree_to_tree(Some(&base), Some(&head), None)
72 .with_context(|| "Could not get diff for last commit")
73 }
74}
75
76fn parse_patch(patch: &Patch) -> (Vec<u32>, Vec<RangeInclusive<u32>>) {
81 let mut additions = Vec::new();
82 let mut diff_hunks = Vec::new();
83 for hunk_idx in 0..patch.num_hunks() {
84 let (hunk, line_count) = patch.hunk(hunk_idx).unwrap();
85 diff_hunks.push(RangeInclusive::new(
86 hunk.new_start(),
87 hunk.new_start() + hunk.new_lines(),
88 ));
89 for line in 0..line_count {
90 let diff_line = patch.line_in_hunk(hunk_idx, line).unwrap();
91 if diff_line.origin_value() == git2::DiffLineType::Addition {
92 additions.push(diff_line.new_lineno().unwrap());
93 }
94 }
95 }
96 (additions, diff_hunks)
97}
98
99pub fn parse_diff(diff: &git2::Diff, file_filter: &FileFilter) -> Vec<FileObj> {
104 let mut files: Vec<FileObj> = Vec::new();
105 for file_idx in 0..diff.deltas().count() {
106 let diff_delta = diff.get_delta(file_idx).unwrap();
107 let file_path = diff_delta.new_file().path().unwrap().to_path_buf();
108 if [
109 git2::Delta::Added,
110 git2::Delta::Modified,
111 git2::Delta::Renamed,
112 ]
113 .contains(&diff_delta.status())
114 && file_filter.is_source_or_ignored(&file_path)
115 {
116 let (added_lines, diff_chunks) =
117 parse_patch(&Patch::from_diff(diff, file_idx).unwrap().unwrap());
118 files.push(FileObj::from(file_path, added_lines, diff_chunks));
119 }
120 }
121 files
122}
123
124pub fn parse_diff_from_buf(buff: &[u8], file_filter: &FileFilter) -> Vec<FileObj> {
132 if let Ok(diff_obj) = &Diff::from_buffer(buff) {
133 parse_diff(diff_obj, file_filter)
134 } else {
135 log::warn!("libgit2 failed to parse the diff");
136 brute_force_parse_diff::parse_diff(&String::from_utf8_lossy(buff), file_filter)
137 }
138}
139
140mod brute_force_parse_diff {
141 use regex::Regex;
150 use std::{ops::RangeInclusive, path::PathBuf};
151
152 use crate::common_fs::{FileFilter, FileObj};
153
154 fn get_filename_from_front_matter(front_matter: &str) -> Option<&str> {
155 let diff_file_name = Regex::new(r"(?m)^\+\+\+\sb?/(.*)$").unwrap();
156 let diff_renamed_file = Regex::new(r"(?m)^rename to (.*)$").unwrap();
157 let diff_binary_file = Regex::new(r"(?m)^Binary\sfiles\s").unwrap();
158 if let Some(captures) = diff_file_name.captures(front_matter) {
159 return Some(captures.get(1).unwrap().as_str());
160 }
161 if front_matter.trim_start().starts_with("similarity") {
162 if let Some(captures) = diff_renamed_file.captures(front_matter) {
163 return Some(captures.get(1).unwrap().as_str());
164 }
165 }
166 if !diff_binary_file.is_match(front_matter) {
167 log::warn!("Unrecognized diff starting with:\n{}", front_matter);
168 }
169 None
170 }
171
172 static HUNK_INFO_PATTERN: &str = r"(?m)@@\s\-\d+,\d+\s\+(\d+,\d+)\s@@";
174
175 fn parse_patch(patch: &str) -> (Vec<u32>, Vec<RangeInclusive<u32>>) {
180 let mut diff_chunks = Vec::new();
181 let mut additions = Vec::new();
182
183 let hunk_info = Regex::new(HUNK_INFO_PATTERN).unwrap();
184 if let Some(hunk_headers) = hunk_info.captures(patch) {
185 for (index, (hunk, header)) in
186 hunk_info.split(patch).zip(hunk_headers.iter()).enumerate()
187 {
188 if index == 0 {
189 continue; }
191 let new_range: Vec<u32> = header
192 .unwrap()
193 .as_str()
194 .split(',')
195 .take(2)
196 .map(|val| val.parse::<u32>().unwrap())
197 .collect();
198 let start_line = new_range[0];
199 let end_range = new_range[1];
200 let mut line_numb_in_diff = start_line;
201 diff_chunks.push(RangeInclusive::new(start_line, start_line + end_range));
202 for (line_index, line) in hunk.split('\n').enumerate() {
203 if line.starts_with('+') {
204 additions.push(line_numb_in_diff);
205 }
206 if line_index > 0 && !line.starts_with('-') {
207 line_numb_in_diff += 1;
208 }
209 }
210 }
211 }
212 (additions, diff_chunks)
213 }
214
215 pub fn parse_diff(diff: &str, file_filter: &FileFilter) -> Vec<FileObj> {
216 log::error!("Using brute force diff parsing!");
217 let mut results = Vec::new();
218 let diff_file_delimiter = Regex::new(r"(?m)^diff --git a/.*$").unwrap();
219 let hunk_info = Regex::new(HUNK_INFO_PATTERN).unwrap();
220
221 let file_diffs = diff_file_delimiter.split(diff);
222 for file_diff in file_diffs {
223 if file_diff.is_empty() || file_diff.starts_with("deleted file") {
224 continue;
225 }
226 let hunk_start = if let Some(first_hunk) = hunk_info.find(file_diff) {
227 first_hunk.start()
228 } else {
229 file_diff.len()
230 };
231 let front_matter = &file_diff[..hunk_start];
232 if let Some(file_name) = get_filename_from_front_matter(front_matter) {
233 let file_path = PathBuf::from(file_name);
234 if file_filter.is_source_or_ignored(&file_path) {
235 let (added_lines, diff_chunks) = parse_patch(&file_diff[hunk_start..]);
236 results.push(FileObj::from(file_path, added_lines, diff_chunks));
237 }
238 }
239 }
244 results
245 }
246
247 #[cfg(test)]
249 mod test {
250
251 use super::parse_diff;
252 use crate::{
253 common_fs::{FileFilter, FileObj},
254 git::parse_diff_from_buf,
255 };
256
257 static RENAMED_DIFF: &str = r#"diff --git a/tests/demo/some source.cpp b/tests/demo/some source.c
258similarity index 100%
259rename from /tests/demo/some source.cpp
260rename to /tests/demo/some source.c
261diff --git a/some picture.png b/some picture.png
262new file mode 100644
263Binary files /dev/null and b/some picture.png differ
264"#;
265
266 static RENAMED_DIFF_WITH_CHANGES: &str = r#"diff --git a/tests/demo/some source.cpp b/tests/demo/some source.c
267similarity index 99%
268rename from /tests/demo/some source.cpp
269rename to /tests/demo/some source.c
270@@ -3,7 +3,7 @@
271\n \n \n-#include "iomanip"
272+#include <cstdlib>\n \n \n \n"#;
273
274 #[test]
275 fn parse_renamed_diff() {
276 let diff_buf = RENAMED_DIFF.as_bytes();
277 let files = parse_diff_from_buf(
278 diff_buf,
279 &FileFilter::new(&["target".to_string()], vec!["c".to_string()]),
280 );
281 assert!(!files.is_empty());
282 assert!(files
283 .first()
284 .unwrap()
285 .name
286 .ends_with("tests/demo/some source.c"));
287 }
288
289 #[test]
290 fn parse_renamed_diff_with_patch() {
291 let diff_buf = RENAMED_DIFF_WITH_CHANGES.as_bytes();
292 let files = parse_diff_from_buf(
293 diff_buf,
294 &FileFilter::new(&["target".to_string()], vec!["c".to_string()]),
295 );
296 assert!(!files.is_empty());
297 }
298
299 fn setup_parsed(buf: &str, extensions: &[String]) -> (Vec<FileObj>, Vec<FileObj>) {
302 let ignore = ["target".to_string()];
303 (
304 parse_diff_from_buf(
305 buf.as_bytes(),
306 &FileFilter::new(&ignore, extensions.to_owned()),
307 ),
308 parse_diff(buf, &FileFilter::new(&ignore, extensions.to_owned())),
309 )
310 }
311
312 fn assert_files_eq(files_from_a: &[FileObj], files_from_b: &[FileObj]) {
313 assert_eq!(files_from_a.len(), files_from_b.len());
314 for (a, b) in files_from_a.iter().zip(files_from_b) {
315 assert_eq!(a.name, b.name);
316 assert_eq!(a.added_lines, b.added_lines);
317 assert_eq!(a.added_ranges, b.added_ranges);
318 assert_eq!(a.diff_chunks, b.diff_chunks);
319 }
320 }
321
322 #[test]
323 fn parse_typical_diff() {
324 let diff_buf = "diff --git a/path/for/Some file.cpp b/path/to/Some file.cpp\n\
325 --- a/path/for/Some file.cpp\n\
326 +++ b/path/to/Some file.cpp\n\
327 @@ -3,7 +3,7 @@\n \n \n \n\
328 -#include <some_lib/render/animation.hpp>\n\
329 +#include <some_lib/render/animations.hpp>\n \n \n \n";
330
331 let (files_from_buf, files_from_str) = setup_parsed(diff_buf, &[String::from("cpp")]);
332 assert!(!files_from_buf.is_empty());
333 assert_files_eq(&files_from_buf, &files_from_str);
334 }
335
336 #[test]
337 fn parse_binary_diff() {
338 let diff_buf = "diff --git a/some picture.png b/some picture.png\n\
339 new file mode 100644\n\
340 Binary files /dev/null and b/some picture.png differ\n";
341
342 let (files_from_buf, files_from_str) = setup_parsed(diff_buf, &[String::from("png")]);
343 assert!(files_from_buf.is_empty());
344 assert_files_eq(&files_from_buf, &files_from_str);
345 }
346 }
347}
348
349#[cfg(test)]
350mod test {
351 use std::{
352 env::{self, current_dir, set_current_dir},
353 fs::read,
354 };
355
356 use git2::build::CheckoutBuilder;
357 use git2::{ApplyLocation, Diff, IndexAddOption, Repository};
358
359 fn clone_repo(url: &str, sha: &str, path: &str, patch_path: Option<&str>) {
361 let repo = Repository::clone(url, path).unwrap();
362 let commit = repo.revparse_single(sha).unwrap();
363 repo.checkout_tree(
364 &commit,
365 Some(CheckoutBuilder::new().force().recreate_missing(true)),
366 )
367 .unwrap();
368 repo.set_head_detached(commit.id()).unwrap();
369 if let Some(patch) = patch_path {
370 let diff = Diff::from_buffer(&read(patch).unwrap()).unwrap();
371 repo.apply(&diff, ApplyLocation::Both, None).unwrap();
372 let mut index = repo.index().unwrap();
373 index
374 .add_all(["tests/demo/demo.*"], IndexAddOption::DEFAULT, None)
375 .unwrap();
376 index.write().unwrap();
377 }
378 }
379
380 use tempfile::{tempdir, TempDir};
381
382 use crate::{
383 common_fs::FileFilter,
384 rest_api::{github::GithubApiClient, RestApiClient},
385 };
386
387 fn get_temp_dir() -> TempDir {
388 let tmp = tempdir().unwrap();
389 println!("Using temp folder at {:?}", tmp.path());
390 tmp
391 }
392
393 async fn checkout_cpp_linter_py_repo(
394 sha: &str,
395 extensions: &[String],
396 tmp: &TempDir,
397 patch_path: Option<&str>,
398 ) -> Vec<crate::common_fs::FileObj> {
399 let url = "https://github.com/cpp-linter/cpp-linter";
400 clone_repo(
401 url,
402 sha,
403 tmp.path().as_os_str().to_str().unwrap(),
404 patch_path,
405 );
406 let rest_api_client = GithubApiClient::new();
407 let file_filter = FileFilter::new(&["target".to_string()], extensions.to_owned());
408 set_current_dir(tmp).unwrap();
409 env::set_var("CI", "false"); rest_api_client
411 .unwrap()
412 .get_list_of_changed_files(&file_filter)
413 .await
414 .unwrap()
415 }
416
417 #[tokio::test]
418 async fn with_no_changed_sources() {
419 let sha = "0c236809891000b16952576dc34de082d7a40bf3";
421 let cur_dir = current_dir().unwrap();
422 let tmp = get_temp_dir();
423 let extensions = vec!["cpp".to_string(), "hpp".to_string()];
424 let files = checkout_cpp_linter_py_repo(sha, &extensions, &tmp, None).await;
425 println!("files = {:?}", files);
426 assert!(files.is_empty());
427 set_current_dir(cur_dir).unwrap(); drop(tmp); }
430
431 #[tokio::test]
432 async fn with_changed_sources() {
433 let sha = "950ff0b690e1903797c303c5fc8d9f3b52f1d3c5";
435 let cur_dir = current_dir().unwrap();
436 let tmp = get_temp_dir();
437 let extensions = vec!["cpp".to_string(), "hpp".to_string()];
438 let files = checkout_cpp_linter_py_repo(sha, &extensions.clone(), &tmp, None).await;
439 println!("files = {:?}", files);
440 assert!(files.len() >= 2);
441 for file in files {
442 assert!(
443 extensions.contains(&file.name.extension().unwrap().to_string_lossy().to_string())
444 );
445 }
446 set_current_dir(cur_dir).unwrap(); drop(tmp); }
449
450 #[tokio::test]
451 async fn with_staged_changed_sources() {
452 let sha = "0c236809891000b16952576dc34de082d7a40bf3";
454 let cur_dir = current_dir().unwrap();
455 let tmp = get_temp_dir();
456 let extensions = vec!["cpp".to_string(), "hpp".to_string()];
457 let files = checkout_cpp_linter_py_repo(
458 sha,
459 &extensions.clone(),
460 &tmp,
461 Some("tests/git_status_test_assets/cpp-linter/cpp-linter/test_git_lib.patch"),
462 )
463 .await;
464 println!("files = {:?}", files);
465 assert!(!files.is_empty());
466 for file in files {
467 assert!(
468 extensions.contains(&file.name.extension().unwrap().to_string_lossy().to_string())
469 );
470 }
471 set_current_dir(cur_dir).unwrap(); drop(tmp); }
474}