git_historian/history.rs
1//! Builds a tree of Git history based on the stream of changes parsed from Git
2//!
3//! The basic algorithm is as follows: given a set of paths we care about and a
4//! series of commits (provided by the [parsing](../parsing/index.html) module),
5//! do the following for each commit:
6//!
7//! 1. Call the user-provided filter to see if the user cares about this commit.
8//! If they do, call the user-provided callback to extract information.
9//! The callback can use the data provided by `ParsedCommit`, or it
10//! can gather its own info using the commit's SHA1 ID and git commands.
11//! (The latter is, of course, much slower.)
12//!
13//! 2. Then, for each added/removed/changed/etc. file in the commit,
14//!
15//! - Create a new node representing the delta.
16//!
17//! - Connect it to previous nodes using the "pending edges" map
18//! (see the next step).
19//!
20//! - In a map of "pending edges", place an entry indicating what the file's
21//! name was before this change. If the change was a modification,
22//! the previous name is the same as the current one.
23//! If the change was a move or a copy, the previous name will be different.
24//! If the change was the addition of the file, there is no previous name
25//! to add.
26//!
27//! The net effect is that files' histories are tracked *through* name changes,
28//! a la `git log --follow`.
29//! Currently the act of renaming a file is considered a change, even though
30//! the actual contents haven't changed at all.
31//! (This seems to be consistent with `git log --follow`).
32//! If, in the future, this is not desired, we *do* track the amount a file has
33//! been changed during a rename, and could skip adding a node if no changes are
34//! made to the contents.
35
36use std::cell::RefCell;
37use std::collections::HashMap;
38use std::sync::mpsc::Receiver;
39use std::rc::Rc;
40
41use types::{Change, HistoryNode, HistoryTree, Link, PathSet};
42use parsing::ParsedCommit;
43
44
45/// All the fun state we need to hang onto while building up our history tree.
46/// Forgive the template param stew. All it's doing is allowing the user to
47/// use an arbitrary function `F` to filter commits, then use an arbitrary
48/// function `V` to gather arbitrary data `T` from each commit.
49struct HistoryState<'a, T, V, F>
50 where V: Fn(&ParsedCommit) -> T, F: Fn(&ParsedCommit) -> bool {
51 /// The tree we'll return
52 history: HistoryTree<T>,
53
54 /// History is generated by noting (from the diff type) what a file's
55 /// name was during its previous change.
56 /// Those edges (between HistoryNodes) are stored here, where
57 /// pending_edges[p] lists all nodes that should be connected to the next
58 /// node for path `p`.
59 pending_edges: HashMap<String, Vec<Link<HistoryNode<T>>>>,
60
61 /// Hold a reference to which paths we care about, for culling output.
62 path_set: &'a PathSet,
63
64 /// The user-provided callback that's issued for each diff,
65 /// returning info the user cares about.
66 visitor: V,
67
68 filter: F,
69}
70
71impl<'a, T, V, F> HistoryState<'a, T, V, F>
72 where V: Fn(&ParsedCommit) -> T, F: Fn(&ParsedCommit) -> bool {
73
74 fn new(set: &'a PathSet, vis: V, fil: F) -> HistoryState<T, V, F> {
75 let mut pending = HashMap::new();
76
77 // Due to the check at the start of append_commit(), we must insert
78 // entries into pending_edges so that we care about the first diff found
79 // for a given file.
80 for path in set {
81 pending.insert(path.clone(), Vec::new());
82 }
83
84 HistoryState{ history: HistoryTree::new(),
85 pending_edges: pending,
86 path_set: set,
87 visitor: vis,
88 filter: fil
89 }
90 }
91
92 fn new_node(&self, d: Option<Rc<T>>) -> Link<HistoryNode<T>> {
93 Rc::new(RefCell::new(HistoryNode{data: d, previous: None}))
94 }
95
96 /// Takes a given commit and appends its changes to the history tree
97 fn append_commit(&mut self, commit: &ParsedCommit) {
98
99 // For each commit, see if we care to extract data,
100 // and possibly do so.
101 let data = if (self.filter)(commit) {
102 Some(Rc::new((self.visitor)(commit)))
103 }
104 else {
105 None
106 };
107
108 for delta in &commit.deltas {
109
110 // If we have no edges leading to the next node for this path,
111 // skip to the next diff.
112 if !self.pending_edges.contains_key(&delta.path) {
113 continue;
114 }
115
116 // Each delta needs its own node, but it can share the data.
117 let new_node = self.new_node(data.clone());
118
119 // In all cases where we care about the given path,
120 // insert the new node and link its pending_edges to it.
121 self.append_node(&delta.path, new_node.clone());
122
123 match delta.change {
124 // If a file was modified, its next node is under the same path.
125 Change::Modified => {
126 self.pending_edges.entry(delta.path.clone())
127 .or_insert_with(Vec::new)
128 .push(new_node);
129 }
130
131 // If a file was added, it has no next node (that we care about).
132 // We also don't care about deletions. If a file is deleted,
133 // it didn't make it - at least in that form - to the present.
134 Change::Added |
135 Change::Deleted => { }
136
137 // If a file was moved or copied,
138 // its next node is under the old path.
139 Change::Copied{..} | // TODO: Use % changed
140 Change::Renamed{..} => {
141 self.pending_edges.entry(delta.from.clone())
142 .or_insert_with(Vec::new)
143 .push(new_node);
144 }
145 }
146 }
147 }
148
149 /// Uses `pending_edges` (via `build_edges()`) to link `node` into
150 /// the history tree.
151 fn append_node(&mut self, key: &str, node: Link<HistoryNode<T>>) {
152 self.build_edges(key, &node);
153
154 // If we don't have a node for this path yet, it's the top of the branch.
155 if !self.history.contains_key(key) && self.path_set.contains(key) {
156 self.history.insert(key.to_string(), node);
157 }
158 }
159
160 /// Connects older nodes to `link_to` based on `pending_edges`
161 fn build_edges(&mut self, for_path: &str, link_to: &Link<HistoryNode<T>>) {
162 let from_set = match self.pending_edges.remove(for_path) {
163 None => return, // Bail if there are no changes to link.
164 Some(to_link) => to_link
165 };
166
167 for l in from_set { // For each rename/copy of <key> to <l>,
168 assert!(l.borrow().previous.is_none());
169 l.borrow_mut().previous = Some(link_to.clone());
170 }
171 }
172}
173
174/// Traverses Git history, grabbing arbitrary data at each change for files
175/// in the given set
176///
177/// Changes are tracked *through* file copies and renames.
178/// See the module-level documentation for more info.
179pub fn gather_history<T, V, F>(paths: &PathSet, v: V, f: F,
180 commit_source: &Receiver<ParsedCommit>) -> HistoryTree<T>
181 where V: Fn(&ParsedCommit) -> T, F: Fn(&ParsedCommit) -> bool {
182 let mut state = HistoryState::new(paths, v, f);
183
184 // Start reading commits.
185
186 while let Ok(commit) = commit_source.recv() {
187 state.append_commit(&commit);
188 }
189
190 // We should have consumed all edges by now.
191 // ...but git log --name-status doesn't show the full path of subtree'd files.
192 // TODO: Make the system play well with git subtree.
193 /*
194 if !state.pending_edges.is_empty() {
195 println!("Still have edges for");
196 for key in state.pending_edges.keys() {
197 println!("\t{}", key);
198 }
199 }
200 */
201
202 state.history
203}