git_historian/
history.rs

1//! Builds a tree of Git history based on the stream of changes parsed from Git
2//!
3//! The basic algorithm is as follows: given a set of paths we care about and a
4//! series of commits (provided by the [parsing](../parsing/index.html) module),
5//! do the following for each commit:
6//!
7//! 1. Call the user-provided filter to see if the user cares about this commit.
8//!    If they do, call the user-provided callback to extract information.
9//!    The callback can use the data provided by `ParsedCommit`, or it
10//!    can gather its own info using the commit's SHA1 ID and git commands.
11//!    (The latter is, of course, much slower.)
12//!
13//! 2. Then, for each added/removed/changed/etc. file in the commit,
14//!
15//!    - Create a new node representing the delta.
16//!
17//!    - Connect it to previous nodes using the "pending edges" map
18//!      (see the next step).
19//!
20//!    - In a map of "pending edges", place an entry indicating what the file's
21//!      name was before this change. If the change was a modification,
22//!      the previous name is the same as the current one.
23//!      If the change was a move or a copy, the previous name will be different.
24//!      If the change was the addition of the file, there is no previous name
25//!      to add.
26//!
27//! The net effect is that files' histories are tracked *through* name changes,
28//! a la `git log --follow`.
29//! Currently the act of renaming a file is considered a change, even though
30//! the actual contents haven't changed at all.
31//! (This seems to be consistent with `git log --follow`).
32//! If, in the future, this is not desired, we *do* track the amount a file has
33//! been changed during a rename, and could skip adding a node if no changes are
34//! made to the contents.
35
36use std::cell::RefCell;
37use std::collections::HashMap;
38use std::sync::mpsc::Receiver;
39use std::rc::Rc;
40
41use types::{Change, HistoryNode, HistoryTree, Link, PathSet};
42use parsing::ParsedCommit;
43
44
45/// All the fun state we need to hang onto while building up our history tree.
46/// Forgive the template param stew. All it's doing is allowing the user to
47/// use an arbitrary function `F` to filter commits, then use an arbitrary
48/// function `V` to gather arbitrary data `T` from each commit.
49struct HistoryState<'a, T, V, F>
50    where V: Fn(&ParsedCommit) -> T, F: Fn(&ParsedCommit) -> bool {
51    /// The tree we'll return
52    history: HistoryTree<T>,
53
54    /// History is generated by noting (from the diff type) what a file's
55    /// name was during its previous change.
56    /// Those edges (between HistoryNodes) are stored here, where
57    /// pending_edges[p] lists all nodes that should be connected to the next
58    /// node for path `p`.
59    pending_edges: HashMap<String, Vec<Link<HistoryNode<T>>>>,
60
61    /// Hold a reference to which paths we care about, for culling output.
62    path_set: &'a PathSet,
63
64    /// The user-provided callback that's issued for each diff,
65    /// returning info the user cares about.
66    visitor: V,
67
68    filter: F,
69}
70
71impl<'a, T, V, F> HistoryState<'a, T, V, F>
72    where V: Fn(&ParsedCommit) -> T, F: Fn(&ParsedCommit) -> bool {
73
74    fn new(set: &'a PathSet, vis: V, fil: F) -> HistoryState<T, V, F> {
75        let mut pending = HashMap::new();
76
77        // Due to the check at the start of append_commit(), we must insert
78        // entries into pending_edges so that we care about the first diff found
79        // for a given file.
80        for path in set {
81            pending.insert(path.clone(), Vec::new());
82        }
83
84        HistoryState{ history: HistoryTree::new(),
85                      pending_edges: pending,
86                      path_set: set,
87                      visitor: vis,
88                      filter: fil
89                    }
90    }
91
92    fn new_node(&self, d: Option<Rc<T>>) -> Link<HistoryNode<T>> {
93        Rc::new(RefCell::new(HistoryNode{data: d, previous: None}))
94    }
95
96    /// Takes a given commit and appends its changes to the history tree
97    fn append_commit(&mut self, commit: &ParsedCommit) {
98
99        // For each commit, see if we care to extract data,
100        // and possibly do so.
101        let data = if (self.filter)(commit) {
102                Some(Rc::new((self.visitor)(commit)))
103            }
104            else {
105                None
106            };
107
108        for delta in &commit.deltas {
109
110            // If we have no edges leading to the next node for this path,
111            // skip to the next diff.
112            if !self.pending_edges.contains_key(&delta.path) {
113                continue;
114            }
115
116            // Each delta needs its own node, but it can share the data.
117            let new_node = self.new_node(data.clone());
118
119            // In all cases where we care about the given path,
120            // insert the new node and link its pending_edges to it.
121            self.append_node(&delta.path, new_node.clone());
122
123            match delta.change {
124                // If a file was modified, its next node is under the same path.
125                Change::Modified => {
126                    self.pending_edges.entry(delta.path.clone())
127                        .or_insert_with(Vec::new)
128                        .push(new_node);
129                }
130
131                // If a file was added, it has no next node (that we care about).
132                // We also don't care about deletions. If a file is deleted,
133                // it didn't make it - at least in that form - to the present.
134                Change::Added |
135                Change::Deleted => { }
136
137                // If a file was moved or copied,
138                // its next node is under the old path.
139                Change::Copied{..} | // TODO: Use % changed
140                Change::Renamed{..} => {
141                    self.pending_edges.entry(delta.from.clone())
142                        .or_insert_with(Vec::new)
143                        .push(new_node);
144                }
145            }
146        }
147    }
148
149    /// Uses `pending_edges` (via `build_edges()`) to link `node` into
150    /// the history tree.
151    fn append_node(&mut self, key: &str, node: Link<HistoryNode<T>>) {
152        self.build_edges(key, &node);
153
154        // If we don't have a node for this path yet, it's the top of the branch.
155        if !self.history.contains_key(key) && self.path_set.contains(key) {
156            self.history.insert(key.to_string(), node);
157        }
158    }
159
160    /// Connects older nodes to `link_to` based on `pending_edges`
161    fn build_edges(&mut self, for_path: &str, link_to: &Link<HistoryNode<T>>) {
162        let from_set = match self.pending_edges.remove(for_path) {
163                None => return, // Bail if there are no changes to link.
164                Some(to_link) => to_link
165            };
166
167        for l in from_set { // For each rename/copy of <key> to <l>,
168            assert!(l.borrow().previous.is_none());
169            l.borrow_mut().previous = Some(link_to.clone());
170        }
171    }
172}
173
174/// Traverses Git history, grabbing arbitrary data at each change for files
175/// in the given set
176///
177/// Changes are tracked *through* file copies and renames.
178/// See the module-level documentation for more info.
179pub fn gather_history<T, V, F>(paths: &PathSet, v: V, f: F,
180                               commit_source: &Receiver<ParsedCommit>) -> HistoryTree<T>
181    where V: Fn(&ParsedCommit) -> T, F: Fn(&ParsedCommit) -> bool {
182    let mut state = HistoryState::new(paths, v, f);
183
184    // Start reading commits.
185
186    while let Ok(commit) = commit_source.recv() {
187        state.append_commit(&commit);
188    }
189
190    // We should have consumed all edges by now.
191    // ...but git log --name-status doesn't show the full path of subtree'd files.
192    // TODO: Make the system play well with git subtree.
193    /*
194    if !state.pending_edges.is_empty() {
195        println!("Still have edges for");
196        for key in state.pending_edges.keys() {
197            println!("\t{}", key);
198        }
199    }
200    */
201
202    state.history
203}