1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
//! Builds a tree of Git history based on the stream of changes parsed from Git //! //! The basic algorithm is as follows: given a set of paths we care about and a //! series of commits (provided by the [parsing](../parsing/index.html) module), //! do the following for each commit: //! //! 1. Call the user-provided filter to see if the user cares about this commit. //! If they do, call the user-provided callback to extract information. //! The callback can use the data provided by `ParsedCommit`, or it //! can gather its own info using the commit's SHA1 ID and git commands. //! (The latter is, of course, much slower.) //! //! 2. Then, for each added/removed/changed/etc. file in the commit, //! //! - Create a new node representing the delta. //! //! - Connect it to previous nodes using the "pending edges" map //! (see the next step). //! //! - In a map of "pending edges", place an entry indicating what the file's //! name was before this change. If the change was a modification, //! the previous name is the same as the current one. //! If the change was a move or a copy, the previous name will be different. //! If the change was the addition of the file, there is no previous name //! to add. //! //! The net effect is that files' histories are tracked *through* name changes, //! a la `git log --follow`. //! Currently the act of renaming a file is considered a change, even though //! the actual contents haven't changed at all. //! (This seems to be consistent with `git log --follow`). //! If, in the future, this is not desired, we *do* track the amount a file has //! been changed during a rename, and could skip adding a node if no changes are //! made to the contents. use std::cell::RefCell; use std::collections::HashMap; use std::sync::mpsc::Receiver; use std::rc::Rc; use types::{Change, HistoryNode, HistoryTree, Link, PathSet}; use parsing::ParsedCommit; /// All the fun state we need to hang onto while building up our history tree. /// Forgive the template param stew. All it's doing is allowing the user to /// use an arbitrary function `F` to filter commits, then use an arbitrary /// function `V` to gather arbitrary data `T` from each commit. struct HistoryState<'a, T, V, F> where V: Fn(&ParsedCommit) -> T, F: Fn(&ParsedCommit) -> bool { /// The tree we'll return history: HistoryTree<T>, /// History is generated by noting (from the diff type) what a file's /// name was during its previous change. /// Those edges (between HistoryNodes) are stored here, where /// pending_edges[p] lists all nodes that should be connected to the next /// node for path `p`. pending_edges: HashMap<String, Vec<Link<HistoryNode<T>>>>, /// Hold a reference to which paths we care about, for culling output. path_set: &'a PathSet, /// The user-provided callback that's issued for each diff, /// returning info the user cares about. visitor: V, filter: F, } impl<'a, T, V, F> HistoryState<'a, T, V, F> where V: Fn(&ParsedCommit) -> T, F: Fn(&ParsedCommit) -> bool { fn new(set: &'a PathSet, vis: V, fil: F) -> HistoryState<T, V, F> { let mut pending = HashMap::new(); // Due to the check at the start of append_commit(), we must insert // entries into pending_edges so that we care about the first diff found // for a given file. for path in set { pending.insert(path.clone(), Vec::new()); } HistoryState{ history: HistoryTree::new(), pending_edges: pending, path_set: set, visitor: vis, filter: fil } } fn new_node(&self, d: Option<Rc<T>>) -> Link<HistoryNode<T>> { Rc::new(RefCell::new(HistoryNode{data: d, previous: None})) } /// Takes a given commit and appends its changes to the history tree fn append_commit(&mut self, commit: &ParsedCommit) { // For each commit, see if we care to extract data, // and possibly do so. let data = if (self.filter)(commit) { Some(Rc::new((self.visitor)(commit))) } else { None }; for delta in &commit.deltas { // If we have no edges leading to the next node for this path, // skip to the next diff. if !self.pending_edges.contains_key(&delta.path) { continue; } // Each delta needs its own node, but it can share the data. let new_node = self.new_node(data.clone()); // In all cases where we care about the given path, // insert the new node and link its pending_edges to it. self.append_node(&delta.path, new_node.clone()); match delta.change { // If a file was modified, its next node is under the same path. Change::Modified => { self.pending_edges.entry(delta.path.clone()) .or_insert_with(Vec::new) .push(new_node); } // If a file was added, it has no next node (that we care about). // We also don't care about deletions. If a file is deleted, // it didn't make it - at least in that form - to the present. Change::Added | Change::Deleted => { } // If a file was moved or copied, // its next node is under the old path. Change::Copied{..} | // TODO: Use % changed Change::Renamed{..} => { self.pending_edges.entry(delta.from.clone()) .or_insert_with(Vec::new) .push(new_node); } } } } /// Uses `pending_edges` (via `build_edges()`) to link `node` into /// the history tree. fn append_node(&mut self, key: &str, node: Link<HistoryNode<T>>) { self.build_edges(key, &node); // If we don't have a node for this path yet, it's the top of the branch. if !self.history.contains_key(key) && self.path_set.contains(key) { self.history.insert(key.to_string(), node); } } /// Connects older nodes to `link_to` based on `pending_edges` fn build_edges(&mut self, for_path: &str, link_to: &Link<HistoryNode<T>>) { let from_set = match self.pending_edges.remove(for_path) { None => return, // Bail if there are no changes to link. Some(to_link) => to_link }; for l in from_set { // For each rename/copy of <key> to <l>, assert!(l.borrow().previous.is_none()); l.borrow_mut().previous = Some(link_to.clone()); } } } /// Traverses Git history, grabbing arbitrary data at each change for files /// in the given set /// /// Changes are tracked *through* file copies and renames. /// See the module-level documentation for more info. pub fn gather_history<T, V, F>(paths: &PathSet, v: V, f: F, commit_source: Receiver<ParsedCommit>) -> HistoryTree<T> where V: Fn(&ParsedCommit) -> T, F: Fn(&ParsedCommit) -> bool { let mut state = HistoryState::new(paths, v, f); // Start reading commits. while let Ok(commit) = commit_source.recv() { state.append_commit(&commit); } // We should have consumed all edges by now. // ...but git log --name-status doesn't show the full path of subtree'd files. // TODO: Make the system play well with git subtree. /* if !state.pending_edges.is_empty() { println!("Still have edges for"); for key in state.pending_edges.keys() { println!("\t{}", key); } } */ state.history }