git_historian/
parsing.rs

1//! Parses `git log --name-status` to generate a stream of file changes
2//! in Git history
3//!
4//! Originally the [Rust bindings](https://crates.io/crates/git2) for
5//! [libgit2](https://libgit2.github.com/) was used,
6//! but there is currently no clean or efficient way for libgit to generate
7//! diffs for merges (i.e. only the changes resulting from conflict resolution)
8//! as Git does.
9
10use std::io::{self, BufReader, BufRead};
11use std::process::{Child, Command, Stdio};
12use std::sync::mpsc::SyncSender;
13
14use time::Timespec;
15
16use types::*;
17
18/// Info about a commit pulled from `git log` (or at least the bits we care about)
19#[derive(Debug, Clone)]
20pub struct ParsedCommit {
21    pub id: SHA1,
22    /// The Unix timestamp (in seconds) of the commit
23    pub when: Timespec,
24    pub deltas: Vec<FileDelta>,
25}
26
27impl Default for ParsedCommit {
28    fn default() -> ParsedCommit
29    {
30        ParsedCommit {
31            id: SHA1::default(),
32            when: Timespec::new(0, 0),
33            deltas: Vec::new()
34        }
35    }
36}
37
38/// Starts the `git log` process with the desired config
39fn start_history_process() -> Result<Child, io::Error> {
40    Command::new("git")
41        .arg("log")
42        .arg("--name-status")
43        .arg("-M")
44        .arg("-C")
45        .arg("--pretty=format:%H%n%at") // Commit hash, newline, unix time
46        .stdout(Stdio::piped())
47        .spawn()
48}
49
50/// Parses the Git history and emits a series of `ParsedCommits`
51///
52/// The parsed commits are pushed to a `SyncSender`,
53/// and are assumed to be consumed by another thread.
54pub fn get_history(sink: &SyncSender<ParsedCommit>) {
55
56    enum ParseState { // Used for the state machine below
57        Hash,
58        Timestamp,
59        Changes
60    }
61
62    let child = start_history_process().expect("Couldn't open repo history");
63    let br = BufReader::new(child.stdout.unwrap());
64
65    let mut state = ParseState::Hash;
66    let mut current_commit = ParsedCommit::default();
67
68    for line in br.lines().map(|l| l.unwrap()) {
69
70        if line.is_empty() { continue; } // Blow through empty lines
71
72        let next_state;
73        match state {
74            ParseState::Hash => {
75                current_commit.id = SHA1::parse(&line).unwrap();
76                next_state = ParseState::Timestamp;
77            }
78
79            ParseState::Timestamp => {
80                current_commit.when = Timespec{ sec: line.parse().unwrap(),
81                                                nsec: 0 };
82                next_state = ParseState::Changes;
83            }
84
85            ParseState::Changes => {
86                // If we get the next hash, we're done with the previous commit.
87                if let Ok(id) = SHA1::parse(&line) {
88                    commit_sink(current_commit, sink);
89                    current_commit = ParsedCommit::default();
90
91                    // We just got the OID of the next commit,
92                    // so proceed to reading the timestamp
93                    current_commit.id = id;
94                    next_state = ParseState::Timestamp;
95                }
96                else {
97                    // Keep chomping deltas
98                    next_state = state;
99
100                    current_commit.deltas.push(parse_delta(&line));
101                }
102            }
103        }
104        state = next_state;
105    }
106
107    // Grab the last commit.
108    commit_sink(current_commit, sink);
109}
110
111/// Sends a commit when the state machine is done parsing it.
112#[inline]
113fn commit_sink(c: ParsedCommit, sink: &SyncSender<ParsedCommit>) {
114    sink.send(c).expect("The other end stopped listening for commits.");
115}
116
117/// Parses a delta line generated by `git log --name-status`
118fn parse_delta(s: &str) -> FileDelta {
119    let tokens : Vec<&str> = s.split('\t').collect();
120
121    assert!(tokens.len() > 1, "Expected at least one token");
122    let c = parse_change_code(tokens[0]);
123    let previous : String;
124    let current : String;
125
126    match c {
127        Change::Renamed { .. } |
128        Change::Copied { .. }=> {
129            assert_eq!(tokens.len(), 3, "Expected three tokens from string {:?}", s);
130            current = tokens[2].to_string();
131            previous = tokens[1].to_string();
132        }
133
134        _ => {
135            assert_eq!(tokens.len(), 2, "Expected two tokens from string {:?}", s);
136            current = tokens[1].to_string();
137            previous = String::new();
138        }
139    };
140
141    FileDelta{ change: c, path: current, from: previous }
142}
143
144/// Parses the change code generated by `git log --name-status`
145fn parse_change_code(c: &str) -> Change {
146    assert!(!c.is_empty());
147    let ret = match c.chars().nth(0).unwrap() {
148        'A' => Change::Added,
149        'D' => Change::Deleted,
150        'M' |
151        'T' => Change::Modified, // Let's consider a type change a modification.
152        // Renames and copies are suffixed with a percent changed, e.g. "R87"
153        'R' => Change::Renamed{ percent_changed: c[1..].parse().unwrap() },
154        'C' => Change::Copied{ percent_changed: c[1..].parse().unwrap() },
155        _ => panic!("Unknown delta code: {}", c)
156    };
157
158    // Sanity check percent_changed values for renames and copies
159    match ret {
160        Change::Renamed{ percent_changed: r}  => { assert!(r <= 100); },
161        Change::Copied{ percent_changed: c} => { assert!(c <= 100); },
162        _ => { }
163    };
164
165    ret
166}