1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
//! Parses `git log --name-status` to generate a stream of file changes
//! in Git history
//!
//! Originally the [Rust bindings](https://crates.io/crates/git2) for
//! [libgit2](https://libgit2.github.com/) was used,
//! but there is currently no clean or efficient way for libgit to generate
//! diffs for merges (i.e. only the changes resulting from conflict resolution)
//! as Git does.

use std::io::{self, BufReader, BufRead};
use std::process::{Child, Command, Stdio};
use std::sync::mpsc::SyncSender;

use time::Timespec;

use types::*;

/// Info about a commit pulled from `git log` (or at least the bits we care about)
#[derive(Debug, Clone)]
pub struct ParsedCommit {
    pub id: SHA1,
    /// The Unix timestamp (in seconds) of the commit
    pub when: Timespec,
    pub deltas: Vec<FileDelta>,
}

impl Default for ParsedCommit {
    fn default() -> ParsedCommit
    {
        ParsedCommit {
            id: SHA1::default(),
            when: Timespec::new(0, 0),
            deltas: Vec::new()
        }
    }
}

/// Starts the `git log` process with the desired config
fn start_history_process() -> Result<Child, io::Error> {
    let child = try!(Command::new("git")
        .arg("log")
        .arg("--name-status")
        .arg("-M")
        .arg("-C")
        .arg("--pretty=format:%H%n%at") // Commit hash, newline, unix time
        .stdout(Stdio::piped())
        .spawn());

    Ok(child)
}

/// Parses the Git history and emits a series of `ParsedCommits`
///
/// The parsed commits are pushed to a `SyncSender`,
/// and are assumed to be consumed by another thread.
pub fn get_history(sink: SyncSender<ParsedCommit>) {

    enum ParseState { // Used for the state machine below
        Hash,
        Timestamp,
        Changes
    }

    let child = start_history_process().expect("Couldn't open repo history");
    let br = BufReader::new(child.stdout.unwrap());

    let mut state = ParseState::Hash;
    let mut current_commit = ParsedCommit::default();

    for line in br.lines().map(|l| l.unwrap()) {

        if line.is_empty() { continue; } // Blow through empty lines

        let next_state;
        match state {
            ParseState::Hash => {
                current_commit.id = SHA1::parse(&line).unwrap();
                next_state = ParseState::Timestamp;
            }

            ParseState::Timestamp => {
                current_commit.when = Timespec{ sec: line.parse().unwrap(),
                                                nsec: 0 };
                next_state = ParseState::Changes;
            }

            ParseState::Changes => {
                // If we get the next hash, we're done with the previous commit.
                if let Ok(id) = SHA1::parse(&line) {
                    commit_sink(current_commit, &sink);
                    current_commit = ParsedCommit::default();

                    // We just got the OID of the next commit,
                    // so proceed to reading the timestamp
                    current_commit.id = id;
                    next_state = ParseState::Timestamp;
                }
                else {
                    // Keep chomping deltas
                    next_state = state;

                    current_commit.deltas.push(parse_delta(&line));
                }
            }
        }
        state = next_state;
    }

    // Grab the last commit.
    commit_sink(current_commit, &sink);
}

/// Sends a commit when the state machine is done parsing it.
#[inline]
fn commit_sink(c: ParsedCommit, sink: &SyncSender<ParsedCommit>) {
    sink.send(c).expect("The other end stopped listening for commits.");
}

/// Parses a delta line generated by `git log --name-status`
fn parse_delta(s: &str) -> FileDelta {
    let tokens : Vec<&str> = s.split('\t').collect();

    assert!(tokens.len() > 1, "Expected at least one token");
    let c = parse_change_code(tokens[0]);
    let previous : String;
    let current : String;

    match c {
        Change::Renamed { .. } |
        Change::Copied { .. }=> {
            assert!(tokens.len() == 3, "Expected three tokens from string {:?}", s);
            current = tokens[2].to_string();
            previous = tokens[1].to_string();
        }

        _ => {
            assert!(tokens.len() == 2, "Expected two tokens from string {:?}", s);
            current = tokens[1].to_string();
            previous = String::new();
        }
    };

    FileDelta{ change: c, path: current, from: previous }
}

/// Parses the change code generated by `git log --name-status`
fn parse_change_code(c: &str) -> Change {
    assert!(!c.is_empty());
    let ret = match c.chars().nth(0).unwrap() {
        'A' => Change::Added,
        'D' => Change::Deleted,
        'M' |
        'T' => Change::Modified, // Let's consider a type change a modification.
        // Renames and copies are suffixed with a percent changed, e.g. "R87"
        'R' => Change::Renamed{ percent_changed: c[1..].parse().unwrap() },
        'C' => Change::Copied{ percent_changed: c[1..].parse().unwrap() },
        _ => panic!("Unknown delta code: {}", c)
    };

    // Sanity check percent_changed values for renames and copies
    match ret {
        Change::Renamed{ percent_changed: r}  => { assert!(r <= 100); },
        Change::Copied{ percent_changed: c} => { assert!(c <= 100); },
        _ => { }
    };

    ret
}