1use std::collections::HashMap;
4use std::collections::HashSet;
5use std::iter::FromIterator;
6
7use itertools::Itertools;
8
9fn get_changed_paths_between_trees_internal(
12 repo: &git2::Repository,
13 acc: &mut Vec<Vec<std::path::PathBuf>>,
14 current_path: &[std::path::PathBuf],
15 lhs: Option<&git2::Tree<'_>>,
16 rhs: Option<&git2::Tree<'_>>,
17) -> Result<(), git2::Error> {
18 let lhs_entries = lhs
19 .map(|tree| tree.iter().collect_vec())
20 .unwrap_or_default();
21 let lhs_entries: HashMap<&[u8], &git2::TreeEntry<'_>> = lhs_entries
22 .iter()
23 .map(|entry| (entry.name_bytes(), entry))
24 .collect();
25
26 let rhs_entries = rhs
27 .map(|tree| tree.iter().collect_vec())
28 .unwrap_or_default();
29 let rhs_entries: HashMap<&[u8], &git2::TreeEntry<'_>> = rhs_entries
30 .iter()
31 .map(|entry| (entry.name_bytes(), entry))
32 .collect();
33
34 let all_entry_names: HashSet<&[u8]> = lhs_entries
35 .keys()
36 .chain(rhs_entries.keys())
37 .cloned()
38 .collect();
39 let entries: HashMap<&[u8], (Option<&git2::TreeEntry<'_>>, Option<&git2::TreeEntry<'_>>)> =
40 all_entry_names
41 .into_iter()
42 .map(|entry_name| {
43 (
44 entry_name,
45 (
46 lhs_entries.get(entry_name).copied(),
47 rhs_entries.get(entry_name).copied(),
48 ),
49 )
50 })
51 .collect();
52
53 for (entry_name, (lhs_entry, rhs_entry)) in entries {
54 enum ClassifiedEntry {
55 Absent,
56 NotATree(git2::Oid, i32),
57 Tree(git2::Oid, i32),
58 }
59
60 fn classify_entry(
61 entry: Option<&git2::TreeEntry<'_>>,
62 ) -> Result<ClassifiedEntry, git2::Error> {
63 let entry = match entry {
64 Some(entry) => entry,
65 None => return Ok(ClassifiedEntry::Absent),
66 };
67
68 let file_mode = entry.filemode_raw();
69 match entry.kind() {
70 Some(git2::ObjectType::Tree) => Ok(ClassifiedEntry::Tree(entry.id(), file_mode)),
71 _ => Ok(ClassifiedEntry::NotATree(entry.id(), file_mode)),
72 }
73 }
74
75 let get_tree = |oid| repo.find_tree(oid);
76
77 let full_entry_path = || -> Vec<std::path::PathBuf> {
78 let entry_path = crate::bytes::bytes2path(entry_name);
79 let mut full_entry_path = current_path.to_vec();
80 full_entry_path.push(entry_path.to_owned());
81 full_entry_path
82 };
83 match (classify_entry(lhs_entry)?, classify_entry(rhs_entry)?) {
84 (ClassifiedEntry::Absent, ClassifiedEntry::Absent) => {
85 }
87
88 (
89 ClassifiedEntry::NotATree(lhs_oid, lhs_file_mode),
90 ClassifiedEntry::NotATree(rhs_oid, rhs_file_mode),
91 ) => {
92 if lhs_oid == rhs_oid && lhs_file_mode == rhs_file_mode {
93 } else {
95 acc.push(full_entry_path());
97 }
98 }
99
100 (ClassifiedEntry::Absent, ClassifiedEntry::NotATree(_, _))
101 | (ClassifiedEntry::NotATree(_, _), ClassifiedEntry::Absent) => {
102 acc.push(full_entry_path());
104 }
105
106 (ClassifiedEntry::Absent, ClassifiedEntry::Tree(tree_oid, _))
107 | (ClassifiedEntry::Tree(tree_oid, _), ClassifiedEntry::Absent) => {
108 let full_entry_path = full_entry_path();
111 let tree = get_tree(tree_oid)?;
112 get_changed_paths_between_trees_internal(
113 repo,
114 acc,
115 &full_entry_path,
116 Some(&tree),
117 None,
118 )?;
119 }
120
121 (ClassifiedEntry::NotATree(_, _), ClassifiedEntry::Tree(tree_oid, _))
122 | (ClassifiedEntry::Tree(tree_oid, _), ClassifiedEntry::NotATree(_, _)) => {
123 let full_entry_path = full_entry_path();
126 let tree = get_tree(tree_oid)?;
127 get_changed_paths_between_trees_internal(
128 repo,
129 acc,
130 &full_entry_path,
131 Some(&tree),
132 None,
133 )?;
134 acc.push(full_entry_path);
135 }
136
137 (
138 ClassifiedEntry::Tree(lhs_tree_oid, lhs_file_mode),
139 ClassifiedEntry::Tree(rhs_tree_oid, rhs_file_mode),
140 ) => {
141 match (
142 (lhs_tree_oid == rhs_tree_oid),
143 (lhs_file_mode == rhs_file_mode),
149 ) {
150 (true, true) => {
151 }
153
154 (true, false) => {
155 acc.push(full_entry_path());
157 }
158
159 (false, true) => {
160 let lhs_tree = get_tree(lhs_tree_oid)?;
161 let rhs_tree = get_tree(rhs_tree_oid)?;
162
163 get_changed_paths_between_trees_internal(
166 repo,
167 acc,
168 &full_entry_path(),
169 Some(&lhs_tree),
170 Some(&rhs_tree),
171 )?;
172 }
173
174 (false, false) => {
175 let lhs_tree = get_tree(lhs_tree_oid)?;
176 let rhs_tree = get_tree(rhs_tree_oid)?;
177 let full_entry_path = full_entry_path();
178
179 get_changed_paths_between_trees_internal(
180 repo,
181 acc,
182 &full_entry_path,
183 Some(&lhs_tree),
184 Some(&rhs_tree),
185 )?;
186 acc.push(full_entry_path);
187 }
188 }
189 }
190 }
191 }
192
193 Ok(())
194}
195
196pub fn get_changed_paths_between_trees(
197 repo: &git2::Repository,
198 lhs: Option<&git2::Tree<'_>>,
199 rhs: Option<&git2::Tree<'_>>,
200) -> Result<HashSet<std::path::PathBuf>, git2::Error> {
201 let mut acc = Vec::new();
202 get_changed_paths_between_trees_internal(repo, &mut acc, &Vec::new(), lhs, rhs)?;
203 let changed_paths: HashSet<_> = acc.into_iter().map(std::path::PathBuf::from_iter).collect();
204 Ok(changed_paths)
205}
206
207pub fn rebuild_tree<'r>(
224 repo: &'r git2::Repository,
225 tree: Option<&git2::Tree<'r>>,
226 entries: HashMap<std::path::PathBuf, Option<(git2::Oid, i32)>>,
227) -> Result<git2::Oid, git2::Error> {
228 let (file_entries, dir_entries) = {
229 let mut file_entries: HashMap<std::path::PathBuf, Option<(git2::Oid, i32)>> =
230 HashMap::new();
231 let mut dir_entries: HashMap<
232 std::path::PathBuf,
233 HashMap<std::path::PathBuf, Option<(git2::Oid, i32)>>,
234 > = HashMap::new();
235 for (path, value) in entries {
236 match path.components().collect_vec().as_slice() {
237 [] => {
238 log::trace!("Empty path when hydrating tree");
239 }
240 [file_name] => {
241 file_entries.insert(file_name.into(), value);
242 }
243 components => {
244 let first: std::path::PathBuf = [components[0]].iter().collect();
245 let rest: std::path::PathBuf = components[1..].iter().collect();
246 dir_entries.entry(first).or_default().insert(rest, value);
247 }
248 }
249 }
250 (file_entries, dir_entries)
251 };
252
253 let mut builder = repo.treebuilder(tree)?;
254 for (file_name, file_value) in file_entries {
255 match file_value {
256 Some((oid, file_mode)) => {
257 builder.insert(&file_name, oid, file_mode)?;
258 }
259 None => {
260 remove_entry_if_exists(&mut builder, &file_name)?;
261 }
262 }
263 }
264
265 for (dir_name, dir_value) in dir_entries {
266 let existing_dir_entry: Option<git2::Tree<'_>> = match builder.get(&dir_name)? {
267 Some(existing_dir_entry)
268 if !existing_dir_entry.id().is_zero()
269 && existing_dir_entry.kind() == Some(git2::ObjectType::Tree) =>
270 {
271 Some(repo.find_tree(existing_dir_entry.id())?)
272 }
273 _ => None,
274 };
275 let new_entry_oid = rebuild_tree(repo, existing_dir_entry.as_ref(), dir_value)?;
276
277 let new_entry_tree = repo.find_tree(new_entry_oid)?;
278 if new_entry_tree.is_empty() {
279 remove_entry_if_exists(&mut builder, &dir_name)?;
280 } else {
281 builder.insert(&dir_name, new_entry_oid, git2::FileMode::Tree.into())?;
282 }
283 }
284
285 let tree_oid = builder.write()?;
286 Ok(tree_oid)
287}
288
289fn remove_entry_if_exists(
293 builder: &mut git2::TreeBuilder<'_>,
294 name: &std::path::Path,
295) -> Result<(), git2::Error> {
296 if builder.get(name)?.is_some() {
297 builder.remove(name)?;
298 }
299 Ok(())
300}
301
302pub fn filter_tree<'r>(
306 repo: &'r git2::Repository,
307 tree: &git2::Tree<'r>,
308 paths: &[&std::path::Path],
309) -> Result<git2::Oid, git2::Error> {
310 let entries: HashMap<std::path::PathBuf, Option<(git2::Oid, i32)>> = paths
311 .iter()
312 .map(|path| -> Result<(std::path::PathBuf, _), git2::Error> {
313 let key = path.to_path_buf();
314 match tree.get_path(path) {
315 Ok(tree_entry) => {
316 let value = Some((tree_entry.id(), tree_entry.filemode()));
317 Ok((key, value))
318 }
319 Err(err) if err.code() == git2::ErrorCode::NotFound => Ok((key, None)),
320 Err(err) => Err(err),
321 }
322 })
323 .try_collect()?;
324
325 rebuild_tree(repo, None, entries)
326}
327
328#[cfg(test)]
329mod tests {
330 use snapbox::assert_data_eq;
331 use snapbox::prelude::*;
332 use snapbox::str;
333
334 use super::*;
335
336 use crate::testing::make_git;
337
338 fn dump_tree_entries(tree: &git2::Tree<'_>) -> String {
339 #[allow(clippy::format_collect)]
340 tree.iter()
341 .map(|entry| format!("{:?} {:?}\n", entry.name().unwrap(), entry.id()))
342 .collect()
343 }
344
345 #[test]
346 fn test_rebuild_tree() -> eyre::Result<()> {
347 let git = make_git()?;
348
349 git.init_repo()?;
350
351 git.write_file("foo", "foo")?;
352 git.write_file("bar/bar", "bar")?;
353 git.write_file("bar/baz", "qux")?;
354 git.write_file("xyzzy", "xyzzy")?;
355 git.run(&["add", "."])?;
356 git.run(&["commit", "-m", "commit"])?;
357
358 let repo = git.get_repo()?;
359 let head_oid = repo.head()?.target().unwrap();
360 let head_commit = repo.find_commit(head_oid)?;
361 let head_tree = head_commit.tree()?;
362
363 assert_data_eq!(
364 dump_tree_entries(&head_tree),
365 str![[r#"
366"bar" 778e23a1e80b1feb10e00b15b29a33315929c5b5
367"foo.txt" 19102815663d23f8b75a47e7a01965dcdc96468c
368"initial.txt" 63af22885f8665a312ba8b83db722134f1f8290d
369"xyzzy.txt" 7c465afc533f95ff7d2c91e18921f94aac8292fc
370
371"#]]
372 );
373
374 {
375 let hydrated_tree = {
376 let hydrated_tree_oid = rebuild_tree(&repo, Some(&head_tree), {
377 let mut result = HashMap::new();
378 result.insert(
379 std::path::PathBuf::from("foo-copy.txt"),
380 Some((
381 head_tree
382 .get_path(&std::path::PathBuf::from("foo.txt"))?
383 .id(),
384 0o100644,
385 )),
386 );
387 result.insert(std::path::PathBuf::from("foo.txt"), None);
388 result
389 })?;
390 repo.find_tree(hydrated_tree_oid)?
391 };
392 assert_data_eq!(
393 dump_tree_entries(&hydrated_tree),
394 str![[r#"
395"bar" 778e23a1e80b1feb10e00b15b29a33315929c5b5
396"foo-copy.txt" 19102815663d23f8b75a47e7a01965dcdc96468c
397"initial.txt" 63af22885f8665a312ba8b83db722134f1f8290d
398"xyzzy.txt" 7c465afc533f95ff7d2c91e18921f94aac8292fc
399
400"#]]
401 );
402 }
403
404 {
405 let hydrated_tree = {
406 let hydrated_tree_oid = rebuild_tree(&repo, Some(&head_tree), {
407 let mut result = HashMap::new();
408 result.insert(std::path::PathBuf::from("bar/bar.txt"), None);
409 result
410 })?;
411 repo.find_tree(hydrated_tree_oid)?
412 };
413 assert_data_eq!(
414 dump_tree_entries(&hydrated_tree),
415 str![[r#"
416"bar" 08ee88e1c53fbd01ab76f136a4f2c9d759b981d0
417"foo.txt" 19102815663d23f8b75a47e7a01965dcdc96468c
418"initial.txt" 63af22885f8665a312ba8b83db722134f1f8290d
419"xyzzy.txt" 7c465afc533f95ff7d2c91e18921f94aac8292fc
420
421"#]]
422 );
423 }
424
425 {
426 let hydrated_tree = {
427 let hydrated_tree_oid = rebuild_tree(&repo, Some(&head_tree), {
428 let mut result = HashMap::new();
429 result.insert(std::path::PathBuf::from("bar/bar.txt"), None);
430 result.insert(std::path::PathBuf::from("bar/baz.txt"), None);
431 result
432 })?;
433 repo.find_tree(hydrated_tree_oid)?
434 };
435 assert_data_eq!(
436 dump_tree_entries(&hydrated_tree),
437 str![[r#"
438"foo.txt" 19102815663d23f8b75a47e7a01965dcdc96468c
439"initial.txt" 63af22885f8665a312ba8b83db722134f1f8290d
440"xyzzy.txt" 7c465afc533f95ff7d2c91e18921f94aac8292fc
441
442"#]]
443 );
444 }
445
446 {
447 let dehydrated_tree_oid = filter_tree(
448 &repo,
449 &head_tree,
450 &[
451 std::path::Path::new("bar/baz.txt"),
452 std::path::Path::new("foo.txt"),
453 ],
454 )?;
455 let dehydrated_tree = repo.find_tree(dehydrated_tree_oid)?;
456 assert_data_eq!(
457 dump_tree_entries(&dehydrated_tree),
458 str![[r#"
459"bar" 08ee88e1c53fbd01ab76f136a4f2c9d759b981d0
460"foo.txt" 19102815663d23f8b75a47e7a01965dcdc96468c
461
462"#]]
463 );
464 }
465
466 Ok(())
467 }
468
469 #[test]
470 fn test_detect_path_only_changed_file_mode() -> eyre::Result<()> {
471 let git = make_git()?;
472 git.init_repo()?;
473
474 git.run(&["update-index", "--chmod=+x", "initial.txt"])?;
475 git.run(&["commit", "-m", "update file mode"])?;
476
477 let repo = git.get_repo()?;
478 let oid = repo.head()?.target().unwrap();
479 let commit = repo.find_commit(oid)?;
480
481 let lhs = commit.parent(0).unwrap();
482 let lhs_tree = lhs.tree()?;
483 let rhs_tree = commit.tree()?;
484 let changed_paths =
485 get_changed_paths_between_trees(&repo, Some(&lhs_tree), Some(&rhs_tree))?;
486 let mut changed_paths = changed_paths
487 .into_iter()
488 .map(|p| p.display().to_string())
489 .collect::<Vec<_>>();
490 changed_paths.sort();
491
492 assert_data_eq!(
493 changed_paths.to_debug(),
494 str![[r#"
495[
496 "initial.txt",
497]
498
499"#]]
500 );
501
502 Ok(())
503 }
504}