1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
use crate::advisory::Date;
use crate::error::{Error, ErrorKind};
use gix::date::Time;
use std::{
cmp::{max, min},
collections::HashMap,
path::PathBuf,
};
use tame_index::external::gix;
use super::GitPath;
/// Tracks the time of latest modification of files in git.
#[cfg_attr(docsrs, doc(cfg(feature = "osv-export")))]
pub struct GitModificationTimes {
mtimes: HashMap<PathBuf, Time>,
ctimes: HashMap<PathBuf, Time>,
}
impl GitModificationTimes {
/// Performance: collects all modification times on creation
/// and caches them. This is more efficient for looking up lots of files,
/// but wasteful if you just need to look up a couple files.
pub fn new(repo: &super::Repository) -> Result<Self, Error> {
use gix::{bstr::ByteVec, diff::tree::recorder::Change, prelude::Find};
// Sadly I had to hand-roll this; there is no good off-the-shelf impl.
// libgit2 has had a feature request for this for over a decade:
// https://github.com/libgit2/libgit2/issues/495
// as does git2-rs: https://github.com/rust-lang/git2-rs/issues/588
// To make sure this works I've verified it against a naive shell script using `git log`
// as well as `git whatchanged`
let mut mtimes: HashMap<PathBuf, Time> = HashMap::new();
let mut ctimes: HashMap<PathBuf, Time> = HashMap::new();
let repo = &repo.repo;
let walk = repo
.rev_walk(Some(repo.head_id().map_err(|err| {
format_err!(ErrorKind::Repo, "unable to find head id: {}", err)
})?))
.sorting(gix::traverse::commit::simple::Sorting::ByCommitTimeNewestFirst)
.all()
.map_err(|err| format_err!(ErrorKind::Repo, "unable to walk commits: {}", err))?;
let db = &repo.objects;
let mut buf = Vec::new();
let mut buf2 = Vec::new();
for info in walk {
let info = info.map_err(|err| {
format_err!(ErrorKind::Repo, "failed to retrieve commit info: {}", err)
})?;
let parent_commit_id = match info.parent_ids.len() {
1 => Some(info.parent_ids[0]), // Diff with the previous commit
0 => None, // We've found the initial commit, diff with empty repo
_ => continue, // Ignore merge commits (2+ parents) because that's what 'git whatchanged' does.
};
buf.clear();
buf2.clear();
let (main_tree_id, file_mod_time) = {
let commit = db
.try_find(&info.id, &mut buf)
.map_err(|err| {
format_err!(
ErrorKind::Repo,
"failed to find commit '{}': {}",
info.id,
err
)
})?
.ok_or_else(|| {
format_err!(ErrorKind::Repo, "commit '{}' not present", info.id)
})?
.decode()
.map_err(|err| {
format_err!(
ErrorKind::Repo,
"unable to decode commit '{}': {}",
info.id,
err
)
})?
.into_commit()
.expect("id is actually a commit");
(commit.tree(), commit.time())
};
let current_tree = db
.try_find(&main_tree_id, &mut buf)
.map_err(|err| {
format_err!(
ErrorKind::Repo,
"failed to find tree for commit '{}': {}",
info.id,
err
)
})?
.expect("main tree present")
.try_into_tree_iter()
.expect("id to be a tree");
let previous_tree: Option<_> = {
parent_commit_id
.and_then(|id| db.try_find(&id, &mut buf2).ok().flatten())
.and_then(|c| c.decode().ok())
.and_then(gix::objs::ObjectRef::into_commit)
.map(|c| c.tree())
.and_then(|tree| db.try_find(&tree, &mut buf2).ok().flatten())
.and_then(|tree| tree.try_into_tree_iter())
};
let mut recorder = gix::diff::tree::Recorder::default();
gix::diff::tree::Changes::from(previous_tree)
.needed_to_obtain(
current_tree,
&mut gix::diff::tree::State::default(),
db,
&mut recorder,
)
.map_err(|err| {
format_err!(
ErrorKind::Repo,
"failed to diff commit {} to its parent {:?}: {}",
info.id,
parent_commit_id,
err
)
})?;
for diff in recorder.records {
// AFAIK files should never be deleted from an advisory db,
// though unsure how moves/renames are handled by the recorder
let file_path = match diff {
Change::Addition { path, .. }
| Change::Modification { path, .. }
| Change::Deletion { path, .. } => {
Vec::from(path).into_path_buf().expect("non utf-8 path")
}
};
mtimes
.entry(file_path.clone())
.and_modify(|t| *t = max(*t, file_mod_time))
.or_insert(file_mod_time);
ctimes
.entry(file_path)
.and_modify(|t| *t = min(*t, file_mod_time))
.or_insert(file_mod_time);
}
}
Ok(Self { mtimes, ctimes })
}
/// Looks up the Git modification time for a given file path.
/// The path must be relative to the root of the repository.
pub fn for_path(&self, path: GitPath<'_>) -> Time {
*self.mtimes.get(path.path()).unwrap()
}
/// Looks up the Git creation time for a given file path.
/// The path must be relative to the root of the repository.
pub fn mdate_for_path(&self, path: GitPath<'_>) -> Date {
Self::gix_time_to_date(self.mtimes.get(path.path()).unwrap())
}
/// Looks up the Git creation time for a given file path.
/// The path must be relative to the root of the repository.
pub fn cdate_for_path(&self, path: GitPath<'_>) -> Date {
Self::gix_time_to_date(self.ctimes.get(path.path()).unwrap())
}
fn gix_time_to_date(timestamp: &Time) -> Date {
let odt = crate::repository::git::gix_time_to_time(*timestamp);
let date = odt.date();
format!(
"{:0>4}-{:0>2}-{:0>2}",
date.year(),
u8::from(date.month()),
date.day()
)
.parse()
.unwrap()
}
}