1#![warn(clippy::all)]
2use crate::git_file_future::{FileNameChange, GitFileFutureRegistry};
3use failure::Error;
4use git2::Revwalk;
5use git2::{Commit, Delta, DiffDelta, ObjectType, Odb, Oid, Patch, Repository, Tree};
6use regex::Regex;
7use serde::Serialize;
8use std::cell::RefCell;
9use std::path::{Path, PathBuf};
10use std::rc::Rc;
11use std::time::{Duration, SystemTime};
12
13#[derive(Debug, Clone, Copy)]
14pub struct GitLogConfig {
15 include_merges: bool,
17 earliest_time: u64,
19}
20
21impl GitLogConfig {
22 pub fn default() -> GitLogConfig {
23 GitLogConfig {
24 include_merges: false,
25 earliest_time: 0,
26 }
27 }
28
29 #[allow(dead_code)]
30 pub fn include_merges(self, include_merges: bool) -> GitLogConfig {
31 let mut config = self;
32 config.include_merges = include_merges;
33 config
34 }
35 pub fn since(self, earliest_time: u64) -> GitLogConfig {
37 let mut config = self;
38 config.earliest_time = earliest_time;
39 config
40 }
41 pub fn since_years(self, years: f64) -> GitLogConfig {
43 let secs: f64 = (60 * 60 * 24 * 365) as f64 * years;
44 let years_ago = SystemTime::now() - Duration::from_secs(secs as u64);
45 let years_ago_secs = years_ago
46 .duration_since(SystemTime::UNIX_EPOCH)
47 .unwrap()
48 .as_secs();
49 self.since(years_ago_secs)
50 }
51}
52
53pub struct GitLog {
54 workdir: PathBuf,
56 repo: Repository,
57 config: GitLogConfig,
58}
59
60pub struct GitLogIterator<'a> {
61 git_log: &'a GitLog,
62 odb: Odb<'a>,
63 revwalk: Revwalk<'a>,
64 git_file_future_registry: Rc<RefCell<GitFileFutureRegistry>>,
66}
67
68#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Serialize)]
71pub struct User {
72 name: Option<String>,
73 email: Option<String>,
74}
75
76impl User {
77 pub fn new(name: Option<&str>, email: Option<&str>) -> User {
78 User {
79 name: name.map(|x| x.to_owned()),
80 email: email.map(|x| x.to_owned()),
81 }
82 }
83}
84
85#[derive(Debug, Serialize, Clone, Getters)]
87pub struct GitLogEntry {
88 id: String,
89 summary: String,
90 parents: Vec<String>,
91 committer: User,
92 commit_time: u64,
93 author: User,
94 author_time: u64,
95 co_authors: Vec<User>,
96 file_changes: Vec<FileChange>,
97}
98
99#[derive(Debug, Serialize, Clone, PartialEq)]
101pub enum CommitChange {
102 Add,
103 Rename,
104 Delete,
105 Modify,
106 Copied,
107}
108
109#[derive(Debug, Serialize, Clone, Getters)]
111pub struct FileChange {
112 file: PathBuf,
113 old_file: Option<PathBuf>,
114 change: CommitChange,
115 lines_added: u64,
116 lines_deleted: u64,
117}
118
119impl GitLog {
120 pub fn workdir(&self) -> &Path {
121 &self.workdir
122 }
123
124 pub fn new(start_dir: &Path, config: GitLogConfig) -> Result<GitLog, Error> {
125 let repo = Repository::discover(start_dir)?;
126
127 let workdir = repo
128 .workdir()
129 .ok_or_else(|| format_err!("bare repository - no workdir"))?
130 .canonicalize()?;
131
132 debug!("work dir: {:?}", workdir);
133
134 Ok(GitLog {
135 workdir,
136 repo,
137 config,
138 })
139 }
140
141 pub fn iterator(&self) -> Result<GitLogIterator, Error> {
142 let odb = self.repo.odb()?;
143 let mut revwalk = self.repo.revwalk()?;
144 revwalk.set_sorting(git2::Sort::TOPOLOGICAL)?;
145 revwalk.push_head()?;
146 Ok(GitLogIterator {
147 git_log: &self,
148 odb,
149 revwalk,
150 git_file_future_registry: Rc::new(RefCell::new(GitFileFutureRegistry::new())),
151 })
152 }
153}
154
155impl<'a> Iterator for GitLogIterator<'a> {
156 type Item = Result<GitLogEntry, Error>;
157
158 fn next(&mut self) -> Option<Self::Item> {
159 let mut next_item = self.revwalk.next();
160 while next_item.is_some() {
161 let c = self.summarise_commit(next_item.unwrap());
162 match c {
163 Ok(Some(c)) => {
164 if c.commit_time >= self.git_log.config.earliest_time {
165 self.register_file_futures(&c);
166 return Some(Ok(c));
167 } else {
168 return None; }
170 }
171 Ok(None) => {}
172 Err(e) => return Some(Err(e)),
173 };
174 next_item = self.revwalk.next();
175 }
176 None
177 }
178}
179
180impl<'a> GitLogIterator<'a> {
181 pub fn git_file_future_registry(&self) -> Rc<RefCell<GitFileFutureRegistry>> {
182 self.git_file_future_registry.clone()
183 }
184
185 fn register_file_futures(&mut self, entry: &GitLogEntry) {
187 let parents: Vec<Oid> = entry
189 .parents
190 .iter()
191 .map(|id| Oid::from_str(&id).unwrap())
192 .collect();
193 let mut file_changes: Vec<(PathBuf, FileNameChange)> = Vec::new();
194 for file_change in &entry.file_changes {
195 match file_change.change {
196 CommitChange::Rename => {
197 let old_name = file_change.old_file.as_ref().unwrap().clone();
198 let new_name = file_change.file.clone();
199 file_changes.push((old_name, FileNameChange::Renamed(new_name)))
200 }
201 CommitChange::Delete => {
202 let name = file_change.file.clone();
203 file_changes.push((name, FileNameChange::Deleted()))
204 }
205 _ => (),
206 }
207 }
208 self.git_file_future_registry.borrow_mut().register(
209 &Oid::from_str(&entry.id).unwrap(),
210 &parents,
211 &file_changes,
212 );
213 }
214
215 fn summarise_commit(
218 &self,
219 oid: Result<Oid, git2::Error>,
220 ) -> Result<Option<GitLogEntry>, Error> {
221 let oid = oid?;
222 let kind = self.odb.read(oid)?.kind();
223 match kind {
224 ObjectType::Commit => {
225 let commit = self.git_log.repo.find_commit(oid)?;
226 debug!("processing {:?}", commit);
227 let author = commit.author();
228 let committer = commit.committer();
229 let author_time = author.when().seconds() as u64;
230 let commit_time = committer.when().seconds() as u64;
231 let other_time = commit.time().seconds() as u64;
232 if commit_time != other_time {
233 error!(
234 "Commit {:?} time {:?} != commit time {:?}",
235 commit, other_time, commit_time
236 );
237 }
238 let co_authors = if let Some(message) = commit.message() {
239 find_coauthors(message)
240 } else {
241 Vec::new()
242 };
243
244 let commit_tree = commit.tree()?;
245 let file_changes = commit_file_changes(
246 &self.git_log.repo,
247 &commit,
248 &commit_tree,
249 self.git_log.config,
250 );
251 Ok(Some(GitLogEntry {
252 id: oid.to_string(),
253 summary: commit.summary().unwrap_or("[no message]").to_string(),
254 parents: commit.parent_ids().map(|p| p.to_string()).collect(),
255 committer: signature_to_user(&committer),
256 commit_time,
257 author: signature_to_user(&author),
258 author_time,
259 co_authors,
260 file_changes,
261 }))
262 }
263 _ => {
264 info!("ignoring object type: {}", kind);
265 Ok(None)
266 }
267 }
268 }
269}
270
271fn signature_to_user(signature: &git2::Signature) -> User {
272 User {
273 name: signature.name().map(|x| x.to_owned()),
274 email: signature.email().map(|x| x.to_owned()),
275 }
276}
277
278fn trim_string(s: &str) -> Option<&str> {
279 let trimmed = s.trim();
280 if trimmed.is_empty() {
281 None
282 } else {
283 Some(&trimmed)
284 }
285}
286
287fn find_coauthors(message: &str) -> Vec<User> {
288 lazy_static! {
289 static ref CO_AUTH_LINE: Regex = Regex::new(r"(?m)^\s*Co-authored-by:(.*)$").unwrap();
290 static ref CO_AUTH_ANGLE_BRACKETS: Regex = Regex::new(r"^(.*)<([^>]+)>$").unwrap();
291 }
292
293 CO_AUTH_LINE
294 .captures_iter(message)
295 .map(|capture_group| {
296 let co_author_text = &capture_group[1];
297 if let Some(co_author_bits) = CO_AUTH_ANGLE_BRACKETS.captures(co_author_text) {
298 User::new(
299 trim_string(&co_author_bits.get(1).unwrap().as_str()),
300 trim_string(co_author_bits.get(2).unwrap().as_str()),
301 )
302 } else if co_author_text.contains('@') {
303 User::new(None, trim_string(co_author_text))
305 } else {
306 User::new(trim_string(co_author_text), None)
307 }
308 })
309 .collect()
310}
311
312fn commit_file_changes(
313 repo: &Repository,
314 commit: &Commit,
315 commit_tree: &Tree,
316 config: GitLogConfig,
317) -> Vec<FileChange> {
318 if commit.parent_count() == 0 {
319 info!("Commit {} has no parent", commit.id());
320
321 scan_diffs(&repo, &commit_tree, None, &commit, None).expect("Can't scan for diffs")
322 } else if commit.parent_count() > 1 && !config.include_merges {
323 debug!(
324 "Not showing file changes for merge commit {:?}",
325 commit.id()
326 );
327 Vec::new()
328 } else {
329 commit
330 .parents()
331 .flat_map(|parent| {
332 debug!("Getting changes for parent {:?}:", parent);
333 let parent_tree = parent.tree().expect("can't get parent tree");
334 scan_diffs(
335 &repo,
336 &commit_tree,
337 Some(&parent_tree),
338 &commit,
339 Some(&parent),
340 )
341 .expect("Can't scan for diffs")
342 })
343 .collect()
344 }
345}
346
347fn scan_diffs(
348 repo: &Repository,
349 commit_tree: &Tree,
350 parent_tree: Option<&Tree>,
351 commit: &Commit,
352 parent: Option<&Commit>,
353) -> Result<Vec<FileChange>, Error> {
354 let mut diff = repo.diff_tree_to_tree(parent_tree, Some(&commit_tree), None)?;
355 diff.find_similar(None)?;
357 let file_changes = diff
358 .deltas()
359 .enumerate()
360 .filter_map(|(delta_index, delta)| {
361 let patch =
363 Patch::from_diff(&diff, delta_index).expect("can't get a patch from a diff");
364 let (_, lines_added, lines_deleted) = if let Some(patch) = patch {
365 patch
366 .line_stats()
367 .expect("Couldn't get line stats from a patch")
368 } else {
369 warn!("No patch possible diffing {:?} -> {:?}", commit, parent);
370 (0, 0, 0)
371 };
372 summarise_delta(delta, lines_added as u64, lines_deleted as u64)
373 });
374 Ok(file_changes.collect())
375}
376
377fn summarise_delta(delta: DiffDelta, lines_added: u64, lines_deleted: u64) -> Option<FileChange> {
378 match delta.status() {
379 Delta::Added => {
380 let name = delta.new_file().path().unwrap();
381 Some(FileChange {
382 file: name.to_path_buf(),
383 old_file: None,
384 change: CommitChange::Add,
385 lines_added,
386 lines_deleted,
387 })
388 }
389 Delta::Renamed => {
390 let old_name = delta.old_file().path().unwrap();
391 let new_name = delta.new_file().path().unwrap();
392 Some(FileChange {
393 file: new_name.to_path_buf(),
394 old_file: Some(old_name.to_path_buf()),
395 change: CommitChange::Rename,
396 lines_added,
397 lines_deleted,
398 })
399 }
400 Delta::Deleted => {
401 let name = delta.old_file().path().unwrap();
402 Some(FileChange {
403 file: name.to_path_buf(),
404 old_file: None,
405 change: CommitChange::Delete,
406 lines_added,
407 lines_deleted,
408 })
409 }
410 Delta::Modified => {
411 let name = delta.new_file().path().unwrap();
412 Some(FileChange {
413 file: name.to_path_buf(),
414 old_file: None,
415 change: CommitChange::Modify,
416 lines_added,
417 lines_deleted,
418 })
419 }
420 Delta::Copied => {
421 let old_name = delta.old_file().path().unwrap();
422 let new_name = delta.new_file().path().unwrap();
423 Some(FileChange {
424 file: new_name.to_path_buf(),
425 old_file: Some(old_name.to_path_buf()),
426 change: CommitChange::Copied,
427 lines_added,
428 lines_deleted,
429 })
430 }
431 _ => {
432 error!("Not able to handle delta of status {:?}", delta.status());
433 None
434 }
435 }
436}
437
438#[cfg(test)]
439mod test {
440 use super::*;
441 use pretty_assertions::assert_eq;
442 use serde_json::json;
443 use tempfile::tempdir;
444 use test_shared::*;
445
446 #[test]
447 fn authorless_message_has_no_coauthors() {
448 assert_eq!(find_coauthors("do be do be do"), Vec::<User>::new());
449 }
450
451 #[test]
452 fn can_get_coauthors_from_message() {
453 let message = r#"This is a commit message
454 not valid: Co-authored-by: fred jones
455 Co-authored-by: valid user <valid@thing.com>
456 Co-authored-by: <be.lenient@any-domain.com>
457 Co-authored-by: bad@user <this isn't really trying to be clever>
458 ignore random lines
459 Co-authored-by: if there's no at it's a name
460 Co-authored-by: if there's an @ it's email@thing.com
461 ignore trailing lines
462 "#;
463
464 let expected = vec![
465 User::new(Some("valid user"), Some("valid@thing.com")),
466 User::new(None, Some("be.lenient@any-domain.com")),
467 User::new(
468 Some("bad@user"),
469 Some("this isn't really trying to be clever"),
470 ),
471 User::new(Some("if there's no at it's a name"), None),
472 User::new(None, Some("if there's an @ it's email@thing.com")),
473 ];
474
475 assert_eq!(find_coauthors(message), expected);
476 }
477
478 #[test]
479 fn can_extract_basic_git_log() -> Result<(), Error> {
480 let gitdir = tempdir()?;
481 let git_root = unzip_git_sample("git_sample", gitdir.path())?;
482 let git_log = GitLog::new(&git_root, GitLogConfig::default())?;
483
484 assert_eq!(git_log.workdir.canonicalize()?, git_root.canonicalize()?);
485
486 let err_count = git_log.iterator()?.filter(|x| Result::is_err(x)).count();
487 assert_eq!(err_count, 0);
488
489 let entries: Vec<_> = git_log.iterator()?.filter_map(Result::ok).collect();
490
491 assert_eq_json_file(&entries, "./tests/expected/git/git_sample.json");
492
493 Ok(())
494 }
495
496 #[test]
497 fn git_log_can_include_merge_changes() -> Result<(), Error> {
498 let gitdir = tempdir()?;
499 let git_root = unzip_git_sample("git_sample", gitdir.path())?;
500
501 let git_log = GitLog::new(&git_root, GitLogConfig::default().include_merges(true))?;
502
503 let err_count = git_log.iterator()?.filter(Result::is_err).count();
504 assert_eq!(err_count, 0);
505
506 let entries: Vec<_> = git_log.iterator()?.filter_map(Result::ok).collect();
507
508 assert_eq_json_file(&entries, "./tests/expected/git/git_sample_with_merges.json");
509
510 Ok(())
511 }
512
513 #[allow(clippy::unreadable_literal)]
514 #[test]
515 fn git_log_can_limit_to_recent_history() -> Result<(), Error> {
516 let gitdir = tempdir()?;
517 let git_root = unzip_git_sample("git_sample", gitdir.path())?;
518
519 let git_log = GitLog::new(&git_root, GitLogConfig::default().since(1558521694))?;
520
521 let err_count = git_log.iterator()?.filter(Result::is_err).count();
522 assert_eq!(err_count, 0);
523
524 let ids: Vec<_> = git_log
525 .iterator()?
526 .filter_map(Result::ok)
527 .map(|h| (h.summary.clone(), h.commit_time))
528 .collect();
529 assert_eq!(
530 ids,
531 vec![
532 ("renaming".to_owned(), 1558533240u64),
533 ("just changed parent.clj".to_owned(), 1558524371u64),
534 ("Merge branch \'fiddling\'".to_owned(), 1558521695u64)
535 ]
536 );
537
538 Ok(())
539 }
540
541 #[test]
542 fn git_log_tracks_renames() -> Result<(), Error> {
543 let gitdir = tempdir()?;
544 let git_root = unzip_git_sample("rename_simple", gitdir.path())?;
545
546 let git_log = GitLog::new(&git_root, GitLogConfig::default())?;
547
548 let err_count = git_log.iterator()?.filter(Result::is_err).count();
549 assert_eq!(err_count, 0);
550
551 let mut entries: Vec<_> = git_log.iterator()?.filter_map(Result::ok).collect();
552 entries.sort_by(|a, b| a.author_time.cmp(&b.author_time));
553
554 let changes: Vec<String> = entries
555 .iter()
556 .map(|entry| entry.summary.to_owned())
557 .collect();
558
559 assert_eq!(
560 changes,
561 vec![
562 "initial commit",
563 "unrelated commit",
564 "moving a to c",
565 "moving and renaming"
566 ]
567 );
568
569 let file_changes: Vec<Vec<FileChange>> = entries
570 .iter()
571 .map(|entry| {
572 let mut entries = entry.file_changes.clone();
573 entries.sort_by(|a, b| a.file.cmp(&b.file));
574 entries
575 })
576 .collect();
577
578 assert_eq_json_value(
579 &file_changes,
580 &json!([
581 [{"change":"Add",
582 "file":"a.txt",
583 "lines_added": 4,
584 "lines_deleted": 0,
585 "old_file": null}
586 ],
587 [{"change":"Add",
588 "file":"b.txt",
589 "lines_added": 1,
590 "lines_deleted": 0,
591 "old_file": null}
592 ],
593 [{"change":"Rename",
594 "file":"c.txt",
595 "lines_added": 0,
596 "lines_deleted": 0,
597 "old_file": "a.txt"}
598 ],
599 [{"change":"Rename",
600 "file":"d.txt",
601 "lines_added": 1,
602 "lines_deleted": 0,
603 "old_file": "c.txt"}
604 ]
605 ]
606 ),
607 );
608
609 Ok(())
610 }
611}
612