git_scanner/
git.rs

1#![warn(clippy::all)]
2#![allow(dead_code)]
3#![allow(unused_imports)]
4
5use crate::git_file_history::{FileHistoryEntry, FileHistoryEntryBuilder, GitFileHistory};
6use crate::git_logger::{CommitChange, GitLog, GitLogConfig, User};
7use crate::git_user_dictionary::GitUserDictionary;
8use crate::indicator_calculator::IndicatorCalculator;
9use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
10use failure::Error;
11use git2::Status;
12use serde::{Deserialize, Serialize, Serializer};
13use std::cell::RefCell;
14use std::cmp::Ordering;
15use std::collections::HashMap;
16use std::collections::HashSet;
17use std::iter::once;
18use std::iter::FromIterator;
19use std::path::Path;
20use std::path::PathBuf;
21
22use git2::Repository;
23use serde_json::{json, Value};
24
25/// a struct representing git data for a file
26#[derive(Debug, PartialEq, Serialize, Deserialize)]
27pub struct GitData {
28    last_update: u64,
29    age_in_days: u64,
30    // we only have a creation date if there was an Add change in the dates scanned
31    creation_date: Option<u64>,
32    user_count: usize,
33    users: Vec<usize>, // dictionary IDs
34    details: Option<Vec<GitDetails>>,
35}
36
37/// Git information for a given day, summarized
38/// we don't distinguish multiple changes in a day currently, so if one person changed 1 line and another changed 100 you can't tell the difference.
39/// It is assumed that people work as teams to some degree!
40/// This could be revisited if needed, but I'm trying to keep the log size sane
41/// Also dates are summarized by "author date" - had to pick author or commit date, and
42/// author dates seem more reliable.  But it's named "commit_day" as that's more understandable
43#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
44pub struct GitDetails {
45    /// Note this is based on "author date" - commit dates can be all over the place with PRs, rebasing and the like.
46    pub commit_day: u64,
47    #[serde(serialize_with = "ordered_set")]
48    pub users: HashSet<usize>, // dictionary IDs
49    pub commits: u64,
50    pub lines_added: u64,
51    pub lines_deleted: u64,
52}
53
54impl Ord for GitDetails {
55    fn cmp(&self, other: &Self) -> Ordering {
56        self.commit_day.cmp(&other.commit_day)
57    }
58}
59
60impl PartialOrd for GitDetails {
61    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
62        Some(self.cmp(other))
63    }
64}
65
66fn ordered_set<S>(value: &HashSet<usize>, serializer: S) -> Result<S::Ok, S::Error>
67where
68    S: Serializer,
69{
70    let mut ordered: Vec<&usize> = value.iter().collect();
71    ordered.sort();
72    ordered.serialize(serializer)
73}
74
75/// History of any git roots discovered by the calculator
76///  Split from GitCalculator as we need to mutate the dictionary while borrowing the history immutably
77#[derive(Debug)]
78pub struct GitHistories {
79    git_file_histories: Vec<GitFileHistory>,
80    /// config used to initialize any git histories
81    git_log_config: GitLogConfig,
82}
83
84#[derive(Debug)]
85pub struct GitCalculator {
86    histories: GitHistories,
87    detailed: bool,
88    dictionary: GitUserDictionary,
89}
90
91#[derive(Debug, Clone, PartialEq, Serialize)]
92pub struct GitInfo {
93    pub remote_url: Option<String>,
94    pub head: Option<String>,
95}
96
97fn repository_head(repository: &Repository) -> Result<String, Error> {
98    let head = repository.head()?;
99    let head_ref = head.resolve()?;
100    Ok(head_ref.peel_to_commit()?.id().to_string())
101}
102
103impl GitInfo {
104    pub fn new(path: &Path, repository: Repository) -> Self {
105        let remote = repository.find_remote("origin");
106        let remote_url = match remote {
107            Err(e) => {
108                warn!("Error fetching origin for {:?}: {}", path, e);
109                None
110            }
111            Ok(remote) => remote.url().map(str::to_owned),
112        };
113        let head = match repository_head(&repository) {
114            Err(e) => {
115                warn!("Error fetching head for {:?}: {}", path, e);
116                None
117            }
118            Ok(head) => Some(head),
119        };
120        GitInfo { remote_url, head }
121    }
122}
123
124fn append_unique_users(users: &mut Vec<User>, new_users: HashSet<&User>) {
125    let new_users_cloned = new_users.into_iter().cloned();
126    let old_users: HashSet<User> = users.drain(..).chain(new_users_cloned).collect();
127    let mut all_users: Vec<User> = old_users.into_iter().collect();
128
129    users.append(&mut all_users);
130}
131fn start_of_day(secs_since_epoch: u64) -> u64 {
132    let date_time = NaiveDateTime::from_timestamp(secs_since_epoch as i64, 0);
133    date_time
134        .date()
135        .and_time(NaiveTime::from_num_seconds_from_midnight(0, 0))
136        .timestamp() as u64
137}
138impl GitHistories {
139    fn git_history(&self, filename: &Path) -> Option<&GitFileHistory> {
140        self.git_file_histories
141            .iter()
142            .find(|h| h.is_repo_for(filename).unwrap())
143        // TODO can we get rid of unwrap here?
144        // it's tricky as we can't return a Result.
145    }
146
147    fn add_history_for(&mut self, filename: &Path) -> Result<(), Error> {
148        info!("Adding new git log for {:?}", &filename);
149        let mut git_log = GitLog::new(filename, self.git_log_config)?;
150        info!("Found working dir: {:?}", git_log.workdir());
151        let history = GitFileHistory::new(&mut git_log)?;
152        self.git_file_histories.push(history);
153        Ok(())
154    }
155    fn unique_changers(
156        history: &FileHistoryEntry,
157        dictionary: &mut GitUserDictionary,
158    ) -> HashSet<usize> {
159        let mut users: Vec<&User> = history
160            .co_authors
161            .iter()
162            .chain(once(&history.author))
163            .chain(once(&history.committer))
164            .collect();
165        users.sort();
166        users.dedup();
167        // this used to use a HashSet but I want deterministic ordering and so I want it in a vec anyway
168        users.into_iter().map(|u| dictionary.register(u)).collect()
169    }
170
171    fn stats_from_history(
172        &self,
173        dictionary: &mut GitUserDictionary,
174        last_commit: u64,
175        history: &[FileHistoryEntry],
176        detailed: bool,
177    ) -> Option<GitData> {
178        // for now, just get latest change - maybe non-trivial change? (i.e. ignore rename/copy) - or this could be configurable
179        // and get set of all authors - maybe deduplicate by email.
180        if history.is_empty() {
181            return None;
182        }
183        let mut details: HashMap<u64, GitDetails> = HashMap::new();
184
185        let first_date = history.iter().map(|h| h.author_time).min();
186
187        let mut creation_date = history
188            .iter()
189            .filter(|h| h.change == CommitChange::Add)
190            .map(|h| h.author_time)
191            .min();
192
193        if let Some(creation) = creation_date {
194            // TODO: test this!
195            if first_date.unwrap() < creation {
196                debug!(
197                    "File has a git date {:?} before the first Add operation {:?}",
198                    first_date.unwrap(),
199                    creation
200                );
201                creation_date = None;
202            }
203        }
204
205        let last_update = history.iter().map(|h| h.commit_time).max()?;
206
207        let age_in_days = (last_commit - last_update) / (60 * 60 * 24);
208
209        let changers: HashSet<usize> = history
210            .iter()
211            .flat_map(|h| GitHistories::unique_changers(h, dictionary))
212            .collect();
213
214        for entry in history {
215            let author_day = start_of_day(entry.author_time);
216
217            let daily_details = details.entry(author_day).or_insert(GitDetails {
218                commit_day: author_day,
219                users: HashSet::new(),
220                commits: 0,
221                lines_added: 0,
222                lines_deleted: 0,
223            });
224            daily_details.commits += 1;
225            daily_details
226                .users
227                .extend(GitHistories::unique_changers(entry, dictionary).into_iter());
228            daily_details.lines_added += entry.lines_added;
229            daily_details.lines_deleted += entry.lines_deleted;
230        }
231
232        let mut changer_list: Vec<usize> = changers.into_iter().collect();
233        changer_list.sort();
234
235        let mut details_vec: Vec<GitDetails> = details
236            .into_iter()
237            .map(|(_k, v)| v)
238            .collect::<Vec<GitDetails>>();
239        details_vec.sort();
240
241        Some(GitData {
242            last_update,
243            age_in_days,
244            creation_date,
245            user_count: changer_list.len(),
246            users: changer_list,
247            details: if detailed {
248                Some(details_vec)
249            } else {
250                None // TODO: don't waste time processing details if we don't want them!
251            },
252        })
253    }
254}
255
256impl GitCalculator {
257    pub fn new(config: GitLogConfig, detailed: bool) -> Self {
258        GitCalculator {
259            histories: GitHistories {
260                git_file_histories: Vec::new(),
261                git_log_config: config,
262            },
263            detailed,
264            dictionary: GitUserDictionary::new(),
265        }
266    }
267}
268
269impl IndicatorCalculator for GitCalculator {
270    fn name(&self) -> String {
271        "git".to_string()
272    }
273    fn calculate(&mut self, path: &Path) -> Result<Option<serde_json::Value>, Error> {
274        if path.is_file() {
275            // TODO: refactor this into a method on histories (I tried this but got into a mess with mutable and immutable refs to self!)
276            let history = match self.histories.git_history(path) {
277                Some(history) => history,
278                None => {
279                    info!("Loading git history for {}", path.display());
280                    self.histories.add_history_for(path)?;
281                    info!("history loaded.");
282                    self.histories.git_history(path).unwrap()
283                }
284            };
285            let last_commit = history.last_commit();
286            let file_history = history.history_for(path)?;
287
288            if let Some(file_history) = file_history {
289                let stats = self.histories.stats_from_history(
290                    &mut self.dictionary,
291                    last_commit,
292                    file_history,
293                    self.detailed,
294                );
295                Ok(Some(serde_json::value::to_value(stats).expect(
296                    "Serializable object couldn't be serialized to JSON",
297                ))) // TODO: maybe explicit error? Though this should be fatal
298            } else {
299                // probably outside date range
300                debug!("No git history found for file: {:?}", path);
301                Ok(None)
302            }
303        } else {
304            let git_path = path.join(".git");
305            if git_path.is_dir() {
306                match Repository::discover(path) {
307                    Ok(repository) => {
308                        let info = GitInfo::new(path, repository);
309                        Ok(Some(serde_json::value::to_value(info).expect(
310                            "Serializable object couldn't be serialized to JSON",
311                        )))
312                    }
313                    Err(e) => {
314                        warn!("Can't find git repository at {:?}, {}", path, e);
315                        Ok(None)
316                    }
317                }
318            } else {
319                Ok(None)
320            }
321        }
322    }
323
324    fn metadata(&self) -> Result<Option<Value>, Error> {
325        let dictionary = serde_json::value::to_value(&self.dictionary)
326            .expect("Serializable object couldn't be serialized to JSON");
327        Ok(Some(json!({ "users": dictionary })))
328    }
329}
330
331#[cfg(test)]
332mod test {
333    use super::*;
334    use crate::git_logger::{CommitChange, User};
335    use pretty_assertions::assert_eq;
336
337    lazy_static! {
338        static ref USER_JO: User = User::new(None, Some("jo@smith.com"));
339        static ref USER_X: User = User::new(None, Some("x@smith.com"));
340        static ref USER_Y: User = User::new(Some("Why"), Some("y@smith.com"));
341    }
342
343    #[test]
344    fn gets_basic_stats_from_git_events() -> Result<(), Error> {
345        let one_day_in_secs: u64 = 60 * 60 * 24;
346
347        let first_day = one_day_in_secs;
348
349        let events: Vec<FileHistoryEntry> = vec![
350            FileHistoryEntryBuilder::test_default()
351                .emails("jo@smith.com")
352                .times(first_day)
353                .id("1111")
354                .build()
355                .map_err(failure::err_msg)?,
356            FileHistoryEntryBuilder::test_default()
357                .emails("x@smith.com")
358                .times(first_day + 3 * one_day_in_secs)
359                .author(User::new(Some("Why"), Some("y@smith.com")))
360                .id("2222")
361                .build()
362                .map_err(failure::err_msg)?,
363        ];
364        let histories = GitHistories {
365            git_file_histories: Vec::new(),
366            git_log_config: GitLogConfig::default(),
367        };
368        let mut dictionary = GitUserDictionary::new();
369
370        let today = first_day + 5 * one_day_in_secs;
371
372        let stats = histories.stats_from_history(&mut dictionary, today, &events, false);
373
374        assert_eq!(
375            stats,
376            Some(GitData {
377                last_update: first_day + 3 * one_day_in_secs,
378                age_in_days: 2,
379                creation_date: Some(86400),
380                user_count: 3,
381                users: vec![0, 1, 2],
382                details: None
383            })
384        );
385
386        assert_eq!(dictionary.user_count(), 3);
387        assert_eq!(dictionary.user_id(&USER_JO), Some(&0));
388        assert_eq!(dictionary.user_id(&USER_X), Some(&1));
389        assert_eq!(dictionary.user_id(&USER_Y), Some(&2));
390
391        Ok(())
392    }
393
394    #[test]
395    fn gets_detailed_stats_from_git_events() -> Result<(), Error> {
396        let one_day_in_secs: u64 = 60 * 60 * 24;
397
398        let first_day = one_day_in_secs;
399
400        let events: Vec<FileHistoryEntry> = vec![
401            FileHistoryEntryBuilder::test_default()
402                .emails("jo@smith.com")
403                .times(first_day)
404                .id("1111")
405                .build()
406                .map_err(failure::err_msg)?,
407            FileHistoryEntryBuilder::test_default()
408                .emails("x@smith.com")
409                .times(first_day + 3 * one_day_in_secs)
410                .author(User::new(Some("Why"), Some("y@smith.com")))
411                .id("2222")
412                .build()
413                .map_err(failure::err_msg)?,
414        ];
415        let histories = GitHistories {
416            git_file_histories: Vec::new(),
417            git_log_config: GitLogConfig::default(),
418        };
419        let mut dictionary = GitUserDictionary::new();
420
421        let today = first_day + 5 * one_day_in_secs;
422
423        let stats = histories.stats_from_history(&mut dictionary, today, &events, true);
424
425        let jo_set: HashSet<usize> = vec![0].into_iter().collect();
426        let xy_set: HashSet<usize> = vec![1, 2].into_iter().collect();
427
428        let expected_details: Option<Vec<GitDetails>> = Some(vec![
429            GitDetails {
430                commit_day: 86400,
431                users: jo_set,
432                commits: 1,
433                lines_added: 0,
434                lines_deleted: 0,
435            },
436            GitDetails {
437                commit_day: 345600,
438                users: xy_set,
439                commits: 1,
440                lines_added: 0,
441                lines_deleted: 0,
442            },
443        ]);
444
445        assert_eq!(
446            stats,
447            Some(GitData {
448                last_update: first_day + 3 * one_day_in_secs,
449                age_in_days: 2,
450                creation_date: Some(86400),
451                user_count: 3,
452                users: vec![0, 1, 2],
453                details: expected_details
454            })
455        );
456
457        assert_eq!(dictionary.user_count(), 3);
458        assert_eq!(dictionary.user_id(&USER_JO), Some(&0));
459        assert_eq!(dictionary.user_id(&USER_X), Some(&1));
460        assert_eq!(dictionary.user_id(&USER_Y), Some(&2));
461
462        Ok(())
463    }
464}