1use std::{ops::RangeInclusive, path::PathBuf};
12
13use anyhow::{Context, Result};
14use git2::{Diff, Error, Patch, Repository};
16
17use crate::{
19 cli::LinesChangedOnly,
20 common_fs::{FileFilter, FileObj},
21};
22
23pub fn open_repo(path: &str) -> Result<Repository, Error> {
29 Repository::open(PathBuf::from(path).as_path())
30}
31
32fn get_sha(repo: &Repository, depth: Option<u32>) -> Result<git2::Object<'_>, Error> {
37 match depth {
38 Some(int) => repo.revparse_single(format!("HEAD~{}", int).as_str()),
39 None => repo.revparse_single("HEAD"),
40 }
41}
42
43pub fn get_diff(repo: &'_ Repository) -> Result<git2::Diff<'_>> {
53 let head = get_sha(repo, None).unwrap().peel_to_tree().unwrap();
54 let mut has_staged_files = false;
55 for entry in repo.statuses(None).unwrap().iter() {
56 if entry.status().bits()
57 & (git2::Status::INDEX_NEW.bits()
58 | git2::Status::INDEX_MODIFIED.bits()
59 | git2::Status::INDEX_RENAMED.bits())
60 > 0
61 {
62 has_staged_files = true;
63 break;
64 }
65 }
66
67 if has_staged_files {
72 repo.diff_tree_to_index(Some(&head), None, None)
74 .with_context(|| "Could not get diff for current changes in local repo index")
75 } else {
76 let base = get_sha(repo, Some(1)).unwrap().peel_to_tree().unwrap();
78 repo.diff_tree_to_tree(Some(&base), Some(&head), None)
79 .with_context(|| "Could not get diff for last commit")
80 }
81}
82
83fn parse_patch(patch: &Patch) -> (Vec<u32>, Vec<RangeInclusive<u32>>) {
88 let mut additions = Vec::new();
89 let mut diff_hunks = Vec::new();
90 for hunk_idx in 0..patch.num_hunks() {
91 let (hunk, line_count) = patch.hunk(hunk_idx).unwrap();
92 diff_hunks.push(RangeInclusive::new(
93 hunk.new_start(),
94 hunk.new_start() + hunk.new_lines(),
95 ));
96 for line in 0..line_count {
97 let diff_line = patch.line_in_hunk(hunk_idx, line).unwrap();
98 if diff_line.origin_value() == git2::DiffLineType::Addition {
99 additions.push(diff_line.new_lineno().unwrap());
100 }
101 }
102 }
103 (additions, diff_hunks)
104}
105
106pub fn parse_diff(
111 diff: &git2::Diff,
112 file_filter: &FileFilter,
113 lines_changed_only: &LinesChangedOnly,
114) -> Vec<FileObj> {
115 let mut files: Vec<FileObj> = Vec::new();
116 for file_idx in 0..diff.deltas().count() {
117 let diff_delta = diff.get_delta(file_idx).unwrap();
118 let file_path = diff_delta.new_file().path().unwrap().to_path_buf();
119 if matches!(
120 diff_delta.status(),
121 git2::Delta::Added | git2::Delta::Modified | git2::Delta::Renamed,
122 ) && file_filter.is_source_or_ignored(&file_path)
123 {
124 let (added_lines, diff_chunks) =
125 parse_patch(&Patch::from_diff(diff, file_idx).unwrap().unwrap());
126 if lines_changed_only.is_change_valid(!added_lines.is_empty(), !diff_chunks.is_empty())
127 {
128 files.push(FileObj::from(file_path, added_lines, diff_chunks));
129 }
130 }
131 }
132 files
133}
134
135pub fn parse_diff_from_buf(
143 buff: &[u8],
144 file_filter: &FileFilter,
145 lines_changed_only: &LinesChangedOnly,
146) -> Vec<FileObj> {
147 if let Ok(diff_obj) = &Diff::from_buffer(buff) {
148 parse_diff(diff_obj, file_filter, lines_changed_only)
149 } else {
150 log::warn!("libgit2 failed to parse the diff");
151 brute_force_parse_diff::parse_diff(
152 &String::from_utf8_lossy(buff),
153 file_filter,
154 lines_changed_only,
155 )
156 }
157}
158
159mod brute_force_parse_diff {
160 use regex::Regex;
169 use std::{ops::RangeInclusive, path::PathBuf};
170
171 use crate::{
172 cli::LinesChangedOnly,
173 common_fs::{FileFilter, FileObj},
174 };
175
176 fn get_filename_from_front_matter(front_matter: &str) -> Option<&str> {
177 let diff_file_name = Regex::new(r"(?m)^\+\+\+\sb?/(.*)$").unwrap();
178 let diff_renamed_file = Regex::new(r"(?m)^rename to (.*)$").unwrap();
179 let diff_binary_file = Regex::new(r"(?m)^Binary\sfiles\s").unwrap();
180 if let Some(captures) = diff_file_name.captures(front_matter) {
181 return Some(captures.get(1).unwrap().as_str());
182 }
183 if front_matter.trim_start().starts_with("similarity") {
184 if let Some(captures) = diff_renamed_file.captures(front_matter) {
185 return Some(captures.get(1).unwrap().as_str());
186 }
187 }
188 if !diff_binary_file.is_match(front_matter) {
189 log::warn!("Unrecognized diff starting with:\n{}", front_matter);
190 }
191 None
192 }
193
194 static HUNK_INFO_PATTERN: &str = r"(?m)@@\s\-\d+,\d+\s\+(\d+,\d+)\s@@";
196
197 fn parse_patch(patch: &str) -> (Vec<u32>, Vec<RangeInclusive<u32>>) {
202 let mut diff_chunks = Vec::new();
203 let mut additions = Vec::new();
204
205 let hunk_info = Regex::new(HUNK_INFO_PATTERN).unwrap();
206 if let Some(hunk_headers) = hunk_info.captures(patch) {
207 for (index, (hunk, header)) in
208 hunk_info.split(patch).zip(hunk_headers.iter()).enumerate()
209 {
210 if index == 0 {
211 continue; }
213 let new_range: Vec<u32> = header
214 .unwrap()
215 .as_str()
216 .split(',')
217 .take(2)
218 .map(|val| val.parse::<u32>().unwrap())
219 .collect();
220 let start_line = new_range[0];
221 let end_range = new_range[1];
222 let mut line_numb_in_diff = start_line;
223 diff_chunks.push(RangeInclusive::new(start_line, start_line + end_range));
224 for (line_index, line) in hunk.split('\n').enumerate() {
225 if line.starts_with('+') {
226 additions.push(line_numb_in_diff);
227 }
228 if line_index > 0 && !line.starts_with('-') {
229 line_numb_in_diff += 1;
230 }
231 }
232 }
233 }
234 (additions, diff_chunks)
235 }
236
237 pub fn parse_diff(
238 diff: &str,
239 file_filter: &FileFilter,
240 lines_changed_only: &LinesChangedOnly,
241 ) -> Vec<FileObj> {
242 log::error!("Using brute force diff parsing!");
243 let mut results = Vec::new();
244 let diff_file_delimiter = Regex::new(r"(?m)^diff --git a/.*$").unwrap();
245 let hunk_info = Regex::new(HUNK_INFO_PATTERN).unwrap();
246
247 let file_diffs = diff_file_delimiter.split(diff);
248 for file_diff in file_diffs {
249 if file_diff.is_empty() || file_diff.starts_with("deleted file") {
250 continue;
251 }
252 let hunk_start = if let Some(first_hunk) = hunk_info.find(file_diff) {
253 first_hunk.start()
254 } else {
255 file_diff.len()
256 };
257 let front_matter = &file_diff[..hunk_start];
258 if let Some(file_name) = get_filename_from_front_matter(front_matter) {
259 let file_path = PathBuf::from(file_name);
260 if file_filter.is_source_or_ignored(&file_path) {
261 let (added_lines, diff_chunks) = parse_patch(&file_diff[hunk_start..]);
262 if lines_changed_only
263 .is_change_valid(!added_lines.is_empty(), !diff_chunks.is_empty())
264 {
265 results.push(FileObj::from(file_path, added_lines, diff_chunks));
266 }
267 }
268 }
269 }
274 results
275 }
276
277 #[cfg(test)]
279 mod test {
280
281 use super::parse_diff;
282 use crate::{
283 cli::LinesChangedOnly,
284 common_fs::{FileFilter, FileObj},
285 git::parse_diff_from_buf,
286 };
287
288 static RENAMED_DIFF: &str = r#"diff --git a/tests/demo/some source.cpp b/tests/demo/some source.c
289similarity index 100%
290rename from /tests/demo/some source.cpp
291rename to /tests/demo/some source.c
292diff --git a/some picture.png b/some picture.png
293new file mode 100644
294Binary files /dev/null and b/some picture.png differ
295"#;
296
297 static RENAMED_DIFF_WITH_CHANGES: &str = r#"diff --git a/tests/demo/some source.cpp b/tests/demo/some source.c
298similarity index 99%
299rename from /tests/demo/some source.cpp
300rename to /tests/demo/some source.c
301@@ -3,7 +3,7 @@
302\n \n \n-#include "iomanip"
303+#include <cstdlib>\n \n \n \n"#;
304
305 #[test]
306 fn parse_renamed_diff() {
307 let diff_buf = RENAMED_DIFF.as_bytes();
308 let files = parse_diff_from_buf(
309 diff_buf,
310 &FileFilter::new(&["target".to_string()], vec!["c".to_string()]),
311 &LinesChangedOnly::Off,
312 );
313 assert!(!files.is_empty());
314 assert!(files
315 .first()
316 .unwrap()
317 .name
318 .ends_with("tests/demo/some source.c"));
319 }
320
321 #[test]
322 fn parse_renamed_diff_with_patch() {
323 let diff_buf = RENAMED_DIFF_WITH_CHANGES.as_bytes();
324 let files = parse_diff_from_buf(
325 diff_buf,
326 &FileFilter::new(&["target".to_string()], vec!["c".to_string()]),
327 &LinesChangedOnly::Off,
328 );
329 assert!(!files.is_empty());
330 }
331
332 fn setup_parsed(buf: &str, extensions: &[String]) -> (Vec<FileObj>, Vec<FileObj>) {
335 let ignore = ["target".to_string()];
336 (
337 parse_diff_from_buf(
338 buf.as_bytes(),
339 &FileFilter::new(&ignore, extensions.to_owned()),
340 &LinesChangedOnly::Off,
341 ),
342 parse_diff(
343 buf,
344 &FileFilter::new(&ignore, extensions.to_owned()),
345 &LinesChangedOnly::Off,
346 ),
347 )
348 }
349
350 fn assert_files_eq(files_from_a: &[FileObj], files_from_b: &[FileObj]) {
351 assert_eq!(files_from_a.len(), files_from_b.len());
352 for (a, b) in files_from_a.iter().zip(files_from_b) {
353 assert_eq!(a.name, b.name);
354 assert_eq!(a.added_lines, b.added_lines);
355 assert_eq!(a.added_ranges, b.added_ranges);
356 assert_eq!(a.diff_chunks, b.diff_chunks);
357 }
358 }
359
360 #[test]
361 fn parse_typical_diff() {
362 let diff_buf = "diff --git a/path/for/Some file.cpp b/path/to/Some file.cpp\n\
363 --- a/path/for/Some file.cpp\n\
364 +++ b/path/to/Some file.cpp\n\
365 @@ -3,7 +3,7 @@\n \n \n \n\
366 -#include <some_lib/render/animation.hpp>\n\
367 +#include <some_lib/render/animations.hpp>\n \n \n \n";
368
369 let (files_from_buf, files_from_str) = setup_parsed(diff_buf, &[String::from("cpp")]);
370 assert!(!files_from_buf.is_empty());
371 assert_files_eq(&files_from_buf, &files_from_str);
372 }
373
374 #[test]
375 fn parse_binary_diff() {
376 let diff_buf = "diff --git a/some picture.png b/some picture.png\n\
377 new file mode 100644\n\
378 Binary files /dev/null and b/some picture.png differ\n";
379
380 let (files_from_buf, files_from_str) = setup_parsed(diff_buf, &[String::from("png")]);
381 assert!(files_from_buf.is_empty());
382 assert_files_eq(&files_from_buf, &files_from_str);
383 }
384 }
385}
386
387#[cfg(test)]
388mod test {
389 use std::{
390 env::{self, current_dir, set_current_dir},
391 fs::read,
392 };
393
394 use git2::build::CheckoutBuilder;
395 use git2::{ApplyLocation, Diff, IndexAddOption, Repository};
396
397 fn clone_repo(url: &str, sha: &str, path: &str, patch_path: Option<&str>) {
399 let repo = Repository::clone(url, path).unwrap();
400 let commit = repo.revparse_single(sha).unwrap();
401 repo.checkout_tree(
402 &commit,
403 Some(CheckoutBuilder::new().force().recreate_missing(true)),
404 )
405 .unwrap();
406 repo.set_head_detached(commit.id()).unwrap();
407 if let Some(patch) = patch_path {
408 let diff = Diff::from_buffer(&read(patch).unwrap()).unwrap();
409 repo.apply(&diff, ApplyLocation::Both, None).unwrap();
410 let mut index = repo.index().unwrap();
411 index
412 .add_all(["tests/demo/demo.*"], IndexAddOption::DEFAULT, None)
413 .unwrap();
414 index.write().unwrap();
415 }
416 }
417
418 use tempfile::{tempdir, TempDir};
419
420 use crate::{
421 cli::LinesChangedOnly,
422 common_fs::FileFilter,
423 rest_api::{github::GithubApiClient, RestApiClient},
424 };
425
426 fn get_temp_dir() -> TempDir {
427 let tmp = tempdir().unwrap();
428 println!("Using temp folder at {:?}", tmp.path());
429 tmp
430 }
431
432 async fn checkout_cpp_linter_py_repo(
433 sha: &str,
434 extensions: &[String],
435 tmp: &TempDir,
436 patch_path: Option<&str>,
437 ) -> Vec<crate::common_fs::FileObj> {
438 let url = "https://github.com/cpp-linter/cpp-linter";
439 clone_repo(
440 url,
441 sha,
442 tmp.path().as_os_str().to_str().unwrap(),
443 patch_path,
444 );
445 let rest_api_client = GithubApiClient::new();
446 let file_filter = FileFilter::new(&["target".to_string()], extensions.to_owned());
447 set_current_dir(tmp).unwrap();
448 env::set_var("CI", "false"); rest_api_client
450 .unwrap()
451 .get_list_of_changed_files(&file_filter, &LinesChangedOnly::Off)
452 .await
453 .unwrap()
454 }
455
456 #[tokio::test]
457 async fn with_no_changed_sources() {
458 let sha = "0c236809891000b16952576dc34de082d7a40bf3";
460 let cur_dir = current_dir().unwrap();
461 let tmp = get_temp_dir();
462 let extensions = vec!["cpp".to_string(), "hpp".to_string()];
463 let files = checkout_cpp_linter_py_repo(sha, &extensions, &tmp, None).await;
464 println!("files = {:?}", files);
465 assert!(files.is_empty());
466 set_current_dir(cur_dir).unwrap(); drop(tmp); }
469
470 #[tokio::test]
471 async fn with_changed_sources() {
472 let sha = "950ff0b690e1903797c303c5fc8d9f3b52f1d3c5";
474 let cur_dir = current_dir().unwrap();
475 let tmp = get_temp_dir();
476 let extensions = vec!["cpp".to_string(), "hpp".to_string()];
477 let files = checkout_cpp_linter_py_repo(sha, &extensions.clone(), &tmp, None).await;
478 println!("files = {:?}", files);
479 assert!(files.len() >= 2);
480 for file in files {
481 assert!(
482 extensions.contains(&file.name.extension().unwrap().to_string_lossy().to_string())
483 );
484 }
485 set_current_dir(cur_dir).unwrap(); drop(tmp); }
488
489 #[tokio::test]
490 async fn with_staged_changed_sources() {
491 let sha = "0c236809891000b16952576dc34de082d7a40bf3";
493 let cur_dir = current_dir().unwrap();
494 let tmp = get_temp_dir();
495 let extensions = vec!["cpp".to_string(), "hpp".to_string()];
496 let files = checkout_cpp_linter_py_repo(
497 sha,
498 &extensions.clone(),
499 &tmp,
500 Some("tests/git_status_test_assets/cpp-linter/cpp-linter/test_git_lib.patch"),
501 )
502 .await;
503 println!("files = {:?}", files);
504 assert!(!files.is_empty());
505 for file in files {
506 assert!(
507 extensions.contains(&file.name.extension().unwrap().to_string_lossy().to_string())
508 );
509 }
510 set_current_dir(cur_dir).unwrap(); drop(tmp); }
513}