Skip to main content

grit_lib/
write_tree.rs

1//! Build tree objects from index entries (`git write-tree` core logic).
2
3use std::collections::BTreeMap;
4
5use crate::error::Result;
6use crate::index::{
7    CacheTreeNode, Index, IndexEntry, MODE_EXECUTABLE, MODE_GITLINK, MODE_REGULAR, MODE_SYMLINK,
8    MODE_TREE,
9};
10use crate::objects::{parse_tree, serialize_tree, tree_entry_cmp, ObjectId, ObjectKind, TreeEntry};
11use crate::odb::Odb;
12
13fn ensure_empty_blob_for_intent_to_add(odb: &Odb, index: &Index) -> Result<()> {
14    if index
15        .entries
16        .iter()
17        .any(|e| e.stage() == 0 && e.intent_to_add())
18    {
19        let _ = odb.write(ObjectKind::Blob, b"")?;
20    }
21    Ok(())
22}
23
24/// Build and write tree object(s) from index entries and return the tree OID.
25///
26/// The `prefix` argument optionally limits the write to a subtree path.
27/// Like [`write_tree_from_index`], but only index entries whose path is listed in `paths`
28/// (repository-relative, as stored in the index) are included in the tree.
29pub fn write_tree_from_index_subset(
30    odb: &Odb,
31    index: &Index,
32    paths: &std::collections::HashSet<Vec<u8>>,
33) -> Result<ObjectId> {
34    ensure_empty_blob_for_intent_to_add(odb, index)?;
35
36    let mut entries: Vec<&IndexEntry> = index
37        .entries
38        .iter()
39        .filter(|entry| {
40            entry.stage() == 0
41                && !entry.intent_to_add()
42                && entry.mode != MODE_TREE
43                && paths.contains(&entry.path)
44        })
45        .collect();
46    entries.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
47    build_tree(odb, &entries, b"")
48}
49
50/// Build and write tree object(s) from index entries and return the tree OID.
51pub fn write_tree_from_index(odb: &Odb, index: &Index, prefix: &str) -> Result<ObjectId> {
52    ensure_empty_blob_for_intent_to_add(odb, index)?;
53
54    let prefix_bytes = prefix.as_bytes();
55    let mut entries: Vec<&IndexEntry> = index
56        .entries
57        .iter()
58        .filter(|entry| {
59            entry.stage() == 0
60                && !entry.intent_to_add()
61                && entry.mode != MODE_TREE
62                && entry.path.starts_with(prefix_bytes)
63        })
64        .collect();
65    entries.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
66    build_tree(odb, &entries, prefix_bytes)
67}
68
69/// Build a valid cache-tree extension from the index and write any missing tree objects.
70///
71/// # Errors
72///
73/// Returns an error if tree object creation fails.
74pub fn build_cache_tree_from_index(odb: &Odb, index: &Index) -> Result<CacheTreeNode> {
75    ensure_empty_blob_for_intent_to_add(odb, index)?;
76    let mut entries: Vec<&IndexEntry> = index
77        .entries
78        .iter()
79        .filter(|entry| entry.stage() == 0 && !entry.intent_to_add() && entry.mode != MODE_TREE)
80        .collect();
81    entries.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
82    build_cache_tree_node(odb, b"", Vec::new(), &entries)
83}
84
85fn build_tree(odb: &Odb, entries: &[&IndexEntry], dir_prefix: &[u8]) -> Result<ObjectId> {
86    let mut children: BTreeMap<Vec<u8>, ChildKind> = BTreeMap::new();
87
88    for entry in entries {
89        let path = &entry.path;
90        let rel = if dir_prefix.is_empty() {
91            path.as_slice()
92        } else {
93            path.strip_prefix(dir_prefix)
94                .and_then(|suffix| suffix.strip_prefix(b"/"))
95                .unwrap_or(path.as_slice())
96        };
97
98        if let Some(slash_pos) = rel.iter().position(|&byte| byte == b'/') {
99            let child_name = rel[..slash_pos].to_vec();
100            let sub_prefix = if dir_prefix.is_empty() {
101                child_name.clone()
102            } else {
103                let mut sub_prefix = dir_prefix.to_vec();
104                sub_prefix.push(b'/');
105                sub_prefix.extend_from_slice(&child_name);
106                sub_prefix
107            };
108            children
109                .entry(child_name)
110                .or_insert_with(|| ChildKind::Tree(sub_prefix, Vec::new()))
111                .push_entry(entry);
112        } else {
113            children
114                .entry(rel.to_vec())
115                .or_insert_with(|| ChildKind::Blob {
116                    mode: canonicalize_blob_mode(entry.mode),
117                    oid: entry.oid,
118                });
119        }
120    }
121
122    let mut tree_entries = Vec::with_capacity(children.len());
123    for (name, child) in children {
124        match child {
125            ChildKind::Blob { mode, oid } => tree_entries.push(TreeEntry { mode, name, oid }),
126            ChildKind::Tree(sub_prefix, sub_entries) => {
127                let sub_oid = build_tree(odb, &sub_entries, &sub_prefix)?;
128                tree_entries.push(TreeEntry {
129                    mode: MODE_TREE,
130                    name,
131                    oid: sub_oid,
132                });
133            }
134        }
135    }
136
137    tree_entries.sort_by(|a, b| {
138        let a_tree = a.mode == MODE_TREE;
139        let b_tree = b.mode == MODE_TREE;
140        tree_entry_cmp(&a.name, a_tree, &b.name, b_tree)
141    });
142
143    let data = serialize_tree(&tree_entries);
144    freshen_tree_entries(odb, &tree_entries);
145    odb.write(ObjectKind::Tree, &data)
146}
147
148fn build_cache_tree_node(
149    odb: &Odb,
150    dir_prefix: &[u8],
151    name: Vec<u8>,
152    entries: &[&IndexEntry],
153) -> Result<CacheTreeNode> {
154    let mut children: BTreeMap<Vec<u8>, ChildKind> = BTreeMap::new();
155
156    for entry in entries {
157        let path = &entry.path;
158        let rel = if dir_prefix.is_empty() {
159            path.as_slice()
160        } else {
161            path.strip_prefix(dir_prefix)
162                .and_then(|suffix| suffix.strip_prefix(b"/"))
163                .unwrap_or(path.as_slice())
164        };
165
166        if let Some(slash_pos) = rel.iter().position(|&byte| byte == b'/') {
167            let child_name = rel[..slash_pos].to_vec();
168            let sub_prefix = if dir_prefix.is_empty() {
169                child_name.clone()
170            } else {
171                let mut sub_prefix = dir_prefix.to_vec();
172                sub_prefix.push(b'/');
173                sub_prefix.extend_from_slice(&child_name);
174                sub_prefix
175            };
176            children
177                .entry(child_name)
178                .or_insert_with(|| ChildKind::Tree(sub_prefix, Vec::new()))
179                .push_entry(entry);
180        } else {
181            children
182                .entry(rel.to_vec())
183                .or_insert_with(|| ChildKind::Blob {
184                    mode: canonicalize_blob_mode(entry.mode),
185                    oid: entry.oid,
186                });
187        }
188    }
189
190    let mut tree_entries = Vec::with_capacity(children.len());
191    let mut cache_children = Vec::new();
192    for (child_name, child) in children {
193        match child {
194            ChildKind::Blob { mode, oid } => tree_entries.push(TreeEntry {
195                mode,
196                name: child_name,
197                oid,
198            }),
199            ChildKind::Tree(sub_prefix, sub_entries) => {
200                let child_node =
201                    build_cache_tree_node(odb, &sub_prefix, child_name.clone(), &sub_entries)?;
202                let oid = child_node.oid.ok_or_else(|| {
203                    crate::error::Error::IndexError("cache-tree child missing oid".to_owned())
204                })?;
205                tree_entries.push(TreeEntry {
206                    mode: MODE_TREE,
207                    name: child_name,
208                    oid,
209                });
210                cache_children.push(child_node);
211            }
212        }
213    }
214
215    tree_entries.sort_by(|a, b| {
216        let a_tree = a.mode == MODE_TREE;
217        let b_tree = b.mode == MODE_TREE;
218        tree_entry_cmp(&a.name, a_tree, &b.name, b_tree)
219    });
220    cache_children.sort_by(|a, b| a.name.cmp(&b.name));
221
222    let data = serialize_tree(&tree_entries);
223    freshen_tree_entries(odb, &tree_entries);
224    let oid = odb.write(ObjectKind::Tree, &data)?;
225    Ok(CacheTreeNode::valid(
226        name,
227        entries.len() as i32,
228        oid,
229        cache_children,
230    ))
231}
232
233/// Build a tree for a **partial** commit: paths listed in `paths_from_index` (repository-relative,
234/// UTF-8 path bytes) are taken from `index`; every other path is copied from `base_tree_oid`
235/// (typically `HEAD^{tree}`).
236///
237/// This matches Git's behaviour when committing with pathspecs while the index contains additional
238/// staged paths: the commit tree merges `HEAD` with only the pathspec-selected index updates.
239pub fn write_tree_partial_from_index(
240    odb: &Odb,
241    index: &Index,
242    base_tree_oid: &ObjectId,
243    paths_from_index: &std::collections::HashSet<Vec<u8>>,
244) -> Result<ObjectId> {
245    let _ = odb.write(ObjectKind::Blob, b"");
246
247    fn full_path(prefix: &[u8], name: &[u8]) -> Vec<u8> {
248        if prefix.is_empty() {
249            name.to_vec()
250        } else {
251            let mut p = prefix.to_vec();
252            p.push(b'/');
253            p.extend_from_slice(name);
254            p
255        }
256    }
257
258    fn subtree_affected(paths_from_index: &std::collections::HashSet<Vec<u8>>, dir: &[u8]) -> bool {
259        paths_from_index
260            .iter()
261            .any(|p| p == dir || (p.starts_with(dir) && p.get(dir.len()) == Some(&b'/')))
262    }
263
264    fn index_has_entry_under(index: &Index, dir: &[u8]) -> bool {
265        index.entries.iter().any(|entry| {
266            entry.stage() == 0
267                && !entry.intent_to_add()
268                && entry.mode != MODE_TREE
269                && entry.path.starts_with(dir)
270                && entry.path.get(dir.len()) == Some(&b'/')
271        })
272    }
273
274    fn merge_level(
275        odb: &Odb,
276        index: &Index,
277        base_tree_oid: &ObjectId,
278        prefix: &[u8],
279        paths_from_index: &std::collections::HashSet<Vec<u8>>,
280    ) -> Result<ObjectId> {
281        let base_obj = odb.read(base_tree_oid)?;
282        let base_entries = parse_tree(&base_obj.data)?;
283
284        let mut by_name: BTreeMap<Vec<u8>, TreeEntry> = BTreeMap::new();
285        for te in base_entries {
286            let fp = full_path(prefix, &te.name);
287            if !subtree_affected(paths_from_index, &fp) {
288                by_name.insert(te.name.clone(), te);
289            } else if te.mode == MODE_TREE {
290                if paths_from_index.contains(&fp) && !index_has_entry_under(index, &fp) {
291                    continue;
292                }
293                let sub_oid = merge_level(odb, index, &te.oid, &fp, paths_from_index)?;
294                by_name.insert(
295                    te.name.clone(),
296                    TreeEntry {
297                        mode: MODE_TREE,
298                        name: te.name,
299                        oid: sub_oid,
300                    },
301                );
302            } else if paths_from_index.contains(&fp) {
303                if let Some(ie) = index.entries.iter().find(|e| {
304                    e.stage() == 0 && !e.intent_to_add() && e.mode != MODE_TREE && e.path == fp
305                }) {
306                    by_name.insert(
307                        te.name.clone(),
308                        TreeEntry {
309                            mode: canonicalize_blob_mode(ie.mode),
310                            name: te.name,
311                            oid: ie.oid,
312                        },
313                    );
314                }
315                // No index entry: path was removed — omit from the merged tree.
316            } else {
317                by_name.insert(te.name.clone(), te);
318            }
319        }
320
321        for ie in &index.entries {
322            if ie.stage() != 0 || ie.intent_to_add() || ie.mode == MODE_TREE {
323                continue;
324            }
325            if !paths_from_index.contains(&ie.path) {
326                continue;
327            }
328            let rel = if prefix.is_empty() {
329                ie.path.as_slice()
330            } else if ie.path.starts_with(prefix) && ie.path.get(prefix.len()) == Some(&b'/') {
331                &ie.path[prefix.len() + 1..]
332            } else {
333                continue;
334            };
335            if rel.is_empty() {
336                continue;
337            }
338            if let Some(slash) = rel.iter().position(|&b| b == b'/') {
339                let dir_name = rel[..slash].to_vec();
340                if by_name.contains_key(&dir_name) {
341                    continue;
342                }
343                let sub_prefix = full_path(prefix, &dir_name);
344                let sub_oid =
345                    write_tree_from_index(odb, index, &String::from_utf8_lossy(&sub_prefix))?;
346                by_name.insert(
347                    dir_name.clone(),
348                    TreeEntry {
349                        mode: MODE_TREE,
350                        name: dir_name,
351                        oid: sub_oid,
352                    },
353                );
354            } else {
355                let name = rel.to_vec();
356                if !by_name.contains_key(&name) {
357                    by_name.insert(
358                        name.clone(),
359                        TreeEntry {
360                            mode: canonicalize_blob_mode(ie.mode),
361                            name,
362                            oid: ie.oid,
363                        },
364                    );
365                }
366            }
367        }
368
369        let mut out: Vec<TreeEntry> = by_name.into_values().collect();
370        out.sort_by(|a, b| {
371            let a_tree = a.mode == MODE_TREE;
372            let b_tree = b.mode == MODE_TREE;
373            tree_entry_cmp(&a.name, a_tree, &b.name, b_tree)
374        });
375        let data = serialize_tree(&out);
376        freshen_tree_entries(odb, &out);
377        odb.write(ObjectKind::Tree, &data)
378    }
379
380    merge_level(odb, index, base_tree_oid, b"", paths_from_index)
381}
382
383fn freshen_tree_entries(odb: &Odb, tree_entries: &[TreeEntry]) {
384    for entry in tree_entries {
385        let _ = odb.freshen_object(&entry.oid);
386    }
387}
388
389fn canonicalize_blob_mode(mode: u32) -> u32 {
390    match mode & 0o170000 {
391        0o120000 => MODE_SYMLINK,
392        0o160000 => MODE_GITLINK,
393        0o100000 => {
394            if mode & 0o111 != 0 {
395                MODE_EXECUTABLE
396            } else {
397                MODE_REGULAR
398            }
399        }
400        _ => MODE_REGULAR,
401    }
402}
403
404enum ChildKind<'a> {
405    Blob { mode: u32, oid: ObjectId },
406    Tree(Vec<u8>, Vec<&'a IndexEntry>),
407}
408
409impl<'a> ChildKind<'a> {
410    fn push_entry(&mut self, entry: &'a IndexEntry) {
411        if let Self::Tree(_, entries) = self {
412            entries.push(entry);
413        }
414    }
415}
416
417#[cfg(test)]
418mod tests {
419    #![allow(clippy::expect_used, clippy::unwrap_used)]
420
421    use super::*;
422    use crate::index::{IndexEntry, MODE_EXECUTABLE, MODE_REGULAR, MODE_SYMLINK, MODE_TREE};
423    use crate::objects::parse_tree;
424    use tempfile::TempDir;
425
426    fn entry(path: &str, mode: u32, oid: ObjectId) -> IndexEntry {
427        IndexEntry {
428            ctime_sec: 0,
429            ctime_nsec: 0,
430            mtime_sec: 0,
431            mtime_nsec: 0,
432            dev: 0,
433            ino: 0,
434            mode,
435            uid: 0,
436            gid: 0,
437            size: 0,
438            oid,
439            flags: path.len().min(0xFFF) as u16,
440            flags_extended: None,
441            path: path.as_bytes().to_vec(),
442            base_index_pos: 0,
443        }
444    }
445
446    #[test]
447    fn writes_sorted_tree_with_canonical_modes() {
448        let temp_dir = TempDir::new().unwrap();
449        let odb = Odb::new(temp_dir.path());
450
451        let oid_a = odb.write(ObjectKind::Blob, b"a").unwrap();
452        let oid_exec = odb.write(ObjectKind::Blob, b"exec").unwrap();
453        let oid_link = odb.write(ObjectKind::Blob, b"target").unwrap();
454
455        let mut index = Index::new();
456        index.add_or_replace(entry("bin/run.sh", 0o100777, oid_exec));
457        index.add_or_replace(entry("link", 0o120777, oid_link));
458        index.add_or_replace(entry("a.txt", 0o100664, oid_a));
459
460        let root_oid = write_tree_from_index(&odb, &index, "").unwrap();
461        let root_tree_obj = odb.read(&root_oid).unwrap();
462        let root_entries = parse_tree(&root_tree_obj.data).unwrap();
463
464        assert_eq!(root_entries.len(), 3);
465        assert_eq!(root_entries[0].name, b"a.txt");
466        assert_eq!(root_entries[0].mode, MODE_REGULAR);
467        assert_eq!(root_entries[1].name, b"bin");
468        assert_eq!(root_entries[1].mode, MODE_TREE);
469        assert_eq!(root_entries[2].name, b"link");
470        assert_eq!(root_entries[2].mode, MODE_SYMLINK);
471
472        let bin_tree_obj = odb.read(&root_entries[1].oid).unwrap();
473        let bin_entries = parse_tree(&bin_tree_obj.data).unwrap();
474        assert_eq!(bin_entries.len(), 1);
475        assert_eq!(bin_entries[0].name, b"run.sh");
476        assert_eq!(bin_entries[0].mode, MODE_EXECUTABLE);
477    }
478}