qlty_analysis/git/
diff.rs

1use crate::code::FileIndex;
2use anyhow::Result;
3use git2::{Diff, DiffOptions, Repository};
4use ignore::{DirEntry, Walk, WalkBuilder};
5use qlty_config::issue_transformer::IssueTransformer;
6use qlty_types::analysis::v1::Issue;
7use std::cell::RefCell;
8use std::rc::Rc;
9use std::{
10    collections::HashSet,
11    path::{Path, PathBuf},
12};
13use tracing::{debug, trace, warn};
14
15const PLUS: char = '+';
16
17pub enum DiffMode {
18    HeadToWorkdir,
19    UpstreamToWorkdir(String),
20}
21
22#[derive(Debug, Clone)]
23pub struct GitDiff {
24    pub changed_files: Vec<PathBuf>,
25    pub line_filter: DiffLineTransformer,
26}
27
28impl GitDiff {
29    pub fn compute(mode: DiffMode, path: &Path) -> Result<Self> {
30        let repository = Repository::open(path)?;
31        let head_commit = repository.head()?.peel_to_commit()?;
32
33        let commit = match mode {
34            DiffMode::HeadToWorkdir => head_commit,
35            DiffMode::UpstreamToWorkdir(ref upstream_ref) => {
36                let upstream_head = repository.revparse_single(upstream_ref)?.peel_to_commit()?;
37                let merge_base = repository.merge_base(upstream_head.id(), head_commit.id())?;
38                repository.find_commit(merge_base)?
39            }
40        };
41
42        debug!(
43            "Computing diff_tree_to_workdir_with_index for commit {}",
44            commit.id()
45        );
46
47        let diff = repository.diff_tree_to_workdir_with_index(
48            Some(&commit.tree()?),
49            Some(&mut Self::diff_options()),
50        )?;
51        let changed_files = Self::diff_to_paths(&diff, &repository)?;
52
53        debug!("Found {} changed files", changed_files.len());
54        trace!("Changed files: {:?}", changed_files);
55
56        let line_filter = DiffLineTransformer::new(Self::plus_lines_index(
57            &diff,
58            path.parent().unwrap().to_path_buf(),
59        )?);
60
61        Ok(Self {
62            changed_files,
63            line_filter,
64        })
65    }
66
67    fn plus_lines_index(diff: &git2::Diff, repo_path: PathBuf) -> Result<FileIndex> {
68        let index = Rc::new(RefCell::new(FileIndex::new()));
69
70        diff.foreach(
71            &mut |delta, _progress| {
72                if delta.status() == git2::Delta::Untracked {
73                    if let Some(new_path) = delta.new_file().path() {
74                        // Construct the absolute path for checking fs
75                        let absolute_path = repo_path.join(new_path);
76
77                        if absolute_path.is_dir() {
78                            // If it's a directory, traverse it to get all files
79                            if let Ok(files) = GitDiff::traverse_directory(absolute_path) {
80                                for file in files {
81                                    // Convert back to a relative path
82                                    let relative_path = file.strip_prefix(&repo_path).unwrap();
83                                    index.borrow_mut().insert_file(relative_path);
84                                }
85                            }
86                        } else {
87                            index.borrow_mut().insert_file(new_path);
88                        }
89                    }
90                }
91                true
92            },
93            None,
94            None,
95            Some(&mut |delta, _hunk, line| {
96                if line.origin() == PLUS {
97                    if let Some(new_path) = delta.new_file().path() {
98                        if let Some(new_lineno) = line.new_lineno() {
99                            index.borrow_mut().insert_line(new_path, new_lineno);
100                        }
101                    }
102                }
103                true
104            }),
105        )?;
106
107        Ok(Rc::try_unwrap(index).unwrap().into_inner())
108    }
109
110    fn traverse_directory(path: PathBuf) -> Result<Vec<PathBuf>, std::io::Error> {
111        let mut files = Vec::new();
112
113        for entry in Self::walk_for_path(&path) {
114            let entry = entry.unwrap();
115
116            if let Some(file_type) = entry.file_type() {
117                if file_type.is_file() {
118                    let relative_path = entry.path().to_path_buf();
119                    files.push(relative_path);
120                }
121            } else {
122                warn!(
123                    "Git diff returned a path that is neither a file nor a directory: {:?}",
124                    entry.path()
125                );
126            }
127        }
128
129        Ok(files)
130    }
131
132    fn diff_options() -> DiffOptions {
133        let mut opts = DiffOptions::new();
134        opts.include_untracked(true);
135        opts
136    }
137
138    fn diff_to_paths(diff: &Diff, repository: &Repository) -> Result<Vec<PathBuf>> {
139        let delta_paths = Self::collect_delta_paths(diff);
140        let delta_file_paths = Self::collect_file_paths(&delta_paths, repository)?;
141
142        Ok(delta_file_paths.into_iter().collect())
143    }
144
145    fn collect_delta_paths(diff: &Diff) -> HashSet<PathBuf> {
146        let mut delta_paths = HashSet::new();
147
148        for delta in diff.deltas() {
149            if let Some(path) = delta.new_file().path() {
150                delta_paths.insert(path.to_owned());
151            }
152        }
153
154        delta_paths
155    }
156
157    fn collect_file_paths(
158        delta_paths: &HashSet<PathBuf>,
159        repository: &Repository,
160    ) -> Result<HashSet<PathBuf>> {
161        let mut delta_file_paths = HashSet::new();
162        let repository_work_dir = repository.workdir().unwrap();
163
164        for path in delta_paths {
165            let absolute_path = repository_work_dir.join(path);
166
167            if let Ok(metadata) = absolute_path.metadata() {
168                if metadata.is_dir() {
169                    Self::collect_directory_paths(
170                        &absolute_path,
171                        repository,
172                        &mut delta_file_paths,
173                    )?;
174                } else {
175                    delta_file_paths.insert(path.clone());
176                }
177            }
178        }
179
180        Ok(delta_file_paths)
181    }
182
183    fn collect_directory_paths(
184        absolute_path: &Path,
185        repository: &Repository,
186        delta_file_paths: &mut HashSet<PathBuf>,
187    ) -> Result<()> {
188        for entry in Self::walk_for_path(absolute_path).filter_map(Result::ok) {
189            Self::handle_directory_entry(entry, repository, delta_file_paths);
190        }
191
192        Ok(())
193    }
194
195    fn handle_directory_entry(
196        entry: DirEntry,
197        repository: &Repository,
198        delta_file_paths: &mut HashSet<PathBuf>,
199    ) {
200        if let Some(file_type) = entry.file_type() {
201            if file_type.is_file() {
202                if let Ok(relative_path) = Self::get_relative_path(entry.path(), repository) {
203                    delta_file_paths.insert(relative_path);
204                }
205            }
206        } else {
207            warn!(
208                "Git diff returned a path that is neither a file nor a directory: {:?}",
209                entry.path()
210            );
211        }
212    }
213
214    fn get_relative_path(path: &Path, repository: &Repository) -> Result<PathBuf> {
215        let relative_path = path.strip_prefix(repository.workdir().unwrap())?;
216        Ok(relative_path.to_owned())
217    }
218
219    fn walk_for_path(path: &Path) -> Walk {
220        WalkBuilder::new(path)
221            .hidden(false) // Do not ignore hidden files
222            .build()
223    }
224}
225
226/// Mark issues that are on added lines
227#[derive(Debug, Clone)]
228pub struct DiffLineTransformer {
229    index: FileIndex,
230}
231
232impl DiffLineTransformer {
233    pub fn new(index: FileIndex) -> Self {
234        Self { index }
235    }
236}
237
238impl IssueTransformer for DiffLineTransformer {
239    fn transform(&self, issue: Issue) -> Option<Issue> {
240        let issue_path = if let Some(path) = issue.path() {
241            path
242        } else {
243            // TODO: Issues without a path are not filterable
244            let mut issue = issue;
245            issue.on_added_line = true;
246            return Some(issue);
247        };
248
249        if issue.location.as_ref().unwrap().range.is_none() {
250            if self.index.matches_path(&PathBuf::from(&issue_path)) {
251                let mut issue = issue;
252                issue.on_added_line = true;
253                return Some(issue);
254            } else {
255                return Some(issue);
256            }
257        }
258
259        if self
260            .index
261            .matches_line_range(&PathBuf::from(&issue_path), issue.range()?.line_range_u32())
262        {
263            let mut issue = issue;
264            issue.on_added_line = true;
265            Some(issue)
266        } else {
267            Some(issue)
268        }
269    }
270
271    fn clone_box(&self) -> Box<dyn IssueTransformer> {
272        Box::new(self.clone())
273    }
274}
275
276/// Filter out issues that are not on added lines
277#[derive(Debug, Clone)]
278pub struct DiffLineFilter;
279
280impl IssueTransformer for DiffLineFilter {
281    fn transform(&self, issue: Issue) -> Option<Issue> {
282        if issue.on_added_line {
283            Some(issue)
284        } else {
285            None
286        }
287    }
288
289    fn clone_box(&self) -> Box<dyn IssueTransformer> {
290        Box::new(self.clone())
291    }
292}
293
294#[cfg(test)]
295mod test {
296    use super::*;
297    use itertools::Itertools;
298    use qlty_test_utilities::git::sample_repo;
299    use std::fs;
300    use std::path::PathBuf;
301
302    #[test]
303    fn test_changed_files_respects_gitignore() -> Result<()> {
304        let (td, repo) = sample_repo();
305
306        // new_file.me
307        // .foo/
308        // ├── .gitignore  # contains "bar"
309        // ├── see.me      # contains some random content
310        // └── bar/
311        //     └── ignore.me  # contains content
312
313        fs::write(
314            td.path().join("new_file.me"),
315            "This is some random content.",
316        )
317        .unwrap();
318        let foo_path = td.path().join(".foo");
319        fs::create_dir(&foo_path).unwrap();
320        fs::write(foo_path.join(".gitignore"), "bar").unwrap();
321        fs::write(foo_path.join("see.me"), "This is some random content.").unwrap();
322        let bar_path = foo_path.join("bar");
323        fs::create_dir(&bar_path).unwrap();
324        fs::write(
325            bar_path.join("ignore.me"),
326            "This file should be ignored according to .gitignore.",
327        )
328        .unwrap();
329
330        let git_diff = GitDiff::compute(DiffMode::HeadToWorkdir, &repo.path())?;
331        let paths = git_diff.changed_files;
332
333        let expected_paths = [
334            PathBuf::from(".foo/.gitignore"),
335            PathBuf::from(".foo/see.me"),
336            PathBuf::from("new_file.me"),
337        ];
338
339        assert_eq!(
340            paths.iter().cloned().sorted().collect::<Vec<PathBuf>>(),
341            expected_paths
342        );
343
344        // test new file
345        assert_eq!(
346            git_diff
347                .line_filter
348                .index
349                .matches_line_range(&PathBuf::from("new_file.me"), 1..=1),
350            true
351        );
352
353        // test new file in a folder
354        assert_eq!(
355            git_diff
356                .line_filter
357                .index
358                .matches_line_range(&PathBuf::from(".foo/see.me"), 1..=1),
359            true
360        );
361
362        // test new ignore file in a folder
363        assert_eq!(
364            git_diff
365                .line_filter
366                .index
367                .matches_line_range(&PathBuf::from(".foo/bar/ignore.me"), 1..=1),
368            false
369        );
370
371        Ok(())
372    }
373}