qlty_analysis/git/
diff.rs1use crate::code::FileIndex;
2use anyhow::Result;
3use git2::{Diff, DiffOptions, Repository};
4use ignore::{DirEntry, Walk, WalkBuilder};
5use qlty_config::issue_transformer::IssueTransformer;
6use qlty_types::analysis::v1::Issue;
7use std::cell::RefCell;
8use std::rc::Rc;
9use std::{
10 collections::HashSet,
11 path::{Path, PathBuf},
12};
13use tracing::{debug, trace, warn};
14
15const PLUS: char = '+';
16
17pub enum DiffMode {
18 HeadToWorkdir,
19 UpstreamToWorkdir(String),
20}
21
22#[derive(Debug, Clone)]
23pub struct GitDiff {
24 pub changed_files: Vec<PathBuf>,
25 pub line_filter: DiffLineTransformer,
26}
27
28impl GitDiff {
29 pub fn compute(mode: DiffMode, path: &Path) -> Result<Self> {
30 let repository = Repository::open(path)?;
31 let head_commit = repository.head()?.peel_to_commit()?;
32
33 let commit = match mode {
34 DiffMode::HeadToWorkdir => head_commit,
35 DiffMode::UpstreamToWorkdir(ref upstream_ref) => {
36 let upstream_head = repository.revparse_single(upstream_ref)?.peel_to_commit()?;
37 let merge_base = repository.merge_base(upstream_head.id(), head_commit.id())?;
38 repository.find_commit(merge_base)?
39 }
40 };
41
42 debug!(
43 "Computing diff_tree_to_workdir_with_index for commit {}",
44 commit.id()
45 );
46
47 let diff = repository.diff_tree_to_workdir_with_index(
48 Some(&commit.tree()?),
49 Some(&mut Self::diff_options()),
50 )?;
51 let changed_files = Self::diff_to_paths(&diff, &repository)?;
52
53 debug!("Found {} changed files", changed_files.len());
54 trace!("Changed files: {:?}", changed_files);
55
56 let line_filter = DiffLineTransformer::new(Self::plus_lines_index(
57 &diff,
58 path.parent().unwrap().to_path_buf(),
59 )?);
60
61 Ok(Self {
62 changed_files,
63 line_filter,
64 })
65 }
66
67 fn plus_lines_index(diff: &git2::Diff, repo_path: PathBuf) -> Result<FileIndex> {
68 let index = Rc::new(RefCell::new(FileIndex::new()));
69
70 diff.foreach(
71 &mut |delta, _progress| {
72 if delta.status() == git2::Delta::Untracked {
73 if let Some(new_path) = delta.new_file().path() {
74 let absolute_path = repo_path.join(new_path);
76
77 if absolute_path.is_dir() {
78 if let Ok(files) = GitDiff::traverse_directory(absolute_path) {
80 for file in files {
81 let relative_path = file.strip_prefix(&repo_path).unwrap();
83 index.borrow_mut().insert_file(relative_path);
84 }
85 }
86 } else {
87 index.borrow_mut().insert_file(new_path);
88 }
89 }
90 }
91 true
92 },
93 None,
94 None,
95 Some(&mut |delta, _hunk, line| {
96 if line.origin() == PLUS {
97 if let Some(new_path) = delta.new_file().path() {
98 if let Some(new_lineno) = line.new_lineno() {
99 index.borrow_mut().insert_line(new_path, new_lineno);
100 }
101 }
102 }
103 true
104 }),
105 )?;
106
107 Ok(Rc::try_unwrap(index).unwrap().into_inner())
108 }
109
110 fn traverse_directory(path: PathBuf) -> Result<Vec<PathBuf>, std::io::Error> {
111 let mut files = Vec::new();
112
113 for entry in Self::walk_for_path(&path) {
114 let entry = entry.unwrap();
115
116 if let Some(file_type) = entry.file_type() {
117 if file_type.is_file() {
118 let relative_path = entry.path().to_path_buf();
119 files.push(relative_path);
120 }
121 } else {
122 warn!(
123 "Git diff returned a path that is neither a file nor a directory: {:?}",
124 entry.path()
125 );
126 }
127 }
128
129 Ok(files)
130 }
131
132 fn diff_options() -> DiffOptions {
133 let mut opts = DiffOptions::new();
134 opts.include_untracked(true);
135 opts
136 }
137
138 fn diff_to_paths(diff: &Diff, repository: &Repository) -> Result<Vec<PathBuf>> {
139 let delta_paths = Self::collect_delta_paths(diff);
140 let delta_file_paths = Self::collect_file_paths(&delta_paths, repository)?;
141
142 Ok(delta_file_paths.into_iter().collect())
143 }
144
145 fn collect_delta_paths(diff: &Diff) -> HashSet<PathBuf> {
146 let mut delta_paths = HashSet::new();
147
148 for delta in diff.deltas() {
149 if let Some(path) = delta.new_file().path() {
150 delta_paths.insert(path.to_owned());
151 }
152 }
153
154 delta_paths
155 }
156
157 fn collect_file_paths(
158 delta_paths: &HashSet<PathBuf>,
159 repository: &Repository,
160 ) -> Result<HashSet<PathBuf>> {
161 let mut delta_file_paths = HashSet::new();
162 let repository_work_dir = repository.workdir().unwrap();
163
164 for path in delta_paths {
165 let absolute_path = repository_work_dir.join(path);
166
167 if let Ok(metadata) = absolute_path.metadata() {
168 if metadata.is_dir() {
169 Self::collect_directory_paths(
170 &absolute_path,
171 repository,
172 &mut delta_file_paths,
173 )?;
174 } else {
175 delta_file_paths.insert(path.clone());
176 }
177 }
178 }
179
180 Ok(delta_file_paths)
181 }
182
183 fn collect_directory_paths(
184 absolute_path: &Path,
185 repository: &Repository,
186 delta_file_paths: &mut HashSet<PathBuf>,
187 ) -> Result<()> {
188 for entry in Self::walk_for_path(absolute_path).filter_map(Result::ok) {
189 Self::handle_directory_entry(entry, repository, delta_file_paths);
190 }
191
192 Ok(())
193 }
194
195 fn handle_directory_entry(
196 entry: DirEntry,
197 repository: &Repository,
198 delta_file_paths: &mut HashSet<PathBuf>,
199 ) {
200 if let Some(file_type) = entry.file_type() {
201 if file_type.is_file() {
202 if let Ok(relative_path) = Self::get_relative_path(entry.path(), repository) {
203 delta_file_paths.insert(relative_path);
204 }
205 }
206 } else {
207 warn!(
208 "Git diff returned a path that is neither a file nor a directory: {:?}",
209 entry.path()
210 );
211 }
212 }
213
214 fn get_relative_path(path: &Path, repository: &Repository) -> Result<PathBuf> {
215 let relative_path = path.strip_prefix(repository.workdir().unwrap())?;
216 Ok(relative_path.to_owned())
217 }
218
219 fn walk_for_path(path: &Path) -> Walk {
220 WalkBuilder::new(path)
221 .hidden(false) .build()
223 }
224}
225
226#[derive(Debug, Clone)]
228pub struct DiffLineTransformer {
229 index: FileIndex,
230}
231
232impl DiffLineTransformer {
233 pub fn new(index: FileIndex) -> Self {
234 Self { index }
235 }
236}
237
238impl IssueTransformer for DiffLineTransformer {
239 fn transform(&self, issue: Issue) -> Option<Issue> {
240 let issue_path = if let Some(path) = issue.path() {
241 path
242 } else {
243 let mut issue = issue;
245 issue.on_added_line = true;
246 return Some(issue);
247 };
248
249 if issue.location.as_ref().unwrap().range.is_none() {
250 if self.index.matches_path(&PathBuf::from(&issue_path)) {
251 let mut issue = issue;
252 issue.on_added_line = true;
253 return Some(issue);
254 } else {
255 return Some(issue);
256 }
257 }
258
259 if self
260 .index
261 .matches_line_range(&PathBuf::from(&issue_path), issue.range()?.line_range_u32())
262 {
263 let mut issue = issue;
264 issue.on_added_line = true;
265 Some(issue)
266 } else {
267 Some(issue)
268 }
269 }
270
271 fn clone_box(&self) -> Box<dyn IssueTransformer> {
272 Box::new(self.clone())
273 }
274}
275
276#[derive(Debug, Clone)]
278pub struct DiffLineFilter;
279
280impl IssueTransformer for DiffLineFilter {
281 fn transform(&self, issue: Issue) -> Option<Issue> {
282 if issue.on_added_line {
283 Some(issue)
284 } else {
285 None
286 }
287 }
288
289 fn clone_box(&self) -> Box<dyn IssueTransformer> {
290 Box::new(self.clone())
291 }
292}
293
294#[cfg(test)]
295mod test {
296 use super::*;
297 use itertools::Itertools;
298 use qlty_test_utilities::git::sample_repo;
299 use std::fs;
300 use std::path::PathBuf;
301
302 #[test]
303 fn test_changed_files_respects_gitignore() -> Result<()> {
304 let (td, repo) = sample_repo();
305
306 fs::write(
314 td.path().join("new_file.me"),
315 "This is some random content.",
316 )
317 .unwrap();
318 let foo_path = td.path().join(".foo");
319 fs::create_dir(&foo_path).unwrap();
320 fs::write(foo_path.join(".gitignore"), "bar").unwrap();
321 fs::write(foo_path.join("see.me"), "This is some random content.").unwrap();
322 let bar_path = foo_path.join("bar");
323 fs::create_dir(&bar_path).unwrap();
324 fs::write(
325 bar_path.join("ignore.me"),
326 "This file should be ignored according to .gitignore.",
327 )
328 .unwrap();
329
330 let git_diff = GitDiff::compute(DiffMode::HeadToWorkdir, &repo.path())?;
331 let paths = git_diff.changed_files;
332
333 let expected_paths = [
334 PathBuf::from(".foo/.gitignore"),
335 PathBuf::from(".foo/see.me"),
336 PathBuf::from("new_file.me"),
337 ];
338
339 assert_eq!(
340 paths.iter().cloned().sorted().collect::<Vec<PathBuf>>(),
341 expected_paths
342 );
343
344 assert_eq!(
346 git_diff
347 .line_filter
348 .index
349 .matches_line_range(&PathBuf::from("new_file.me"), 1..=1),
350 true
351 );
352
353 assert_eq!(
355 git_diff
356 .line_filter
357 .index
358 .matches_line_range(&PathBuf::from(".foo/see.me"), 1..=1),
359 true
360 );
361
362 assert_eq!(
364 git_diff
365 .line_filter
366 .index
367 .matches_line_range(&PathBuf::from(".foo/bar/ignore.me"), 1..=1),
368 false
369 );
370
371 Ok(())
372 }
373}