Skip to main content

grit_lib/
write_tree.rs

1//! Build tree objects from index entries (`git write-tree` core logic).
2
3use std::collections::BTreeMap;
4
5use crate::error::Result;
6use crate::index::{
7    CacheTreeNode, Index, IndexEntry, MODE_EXECUTABLE, MODE_GITLINK, MODE_REGULAR, MODE_SYMLINK,
8    MODE_TREE,
9};
10use crate::objects::{parse_tree, serialize_tree, tree_entry_cmp, ObjectId, ObjectKind, TreeEntry};
11use crate::odb::Odb;
12
13fn ensure_empty_blob_for_intent_to_add(odb: &Odb, index: &Index) -> Result<()> {
14    if index
15        .entries
16        .iter()
17        .any(|e| e.stage() == 0 && e.intent_to_add())
18    {
19        let _ = odb.write(ObjectKind::Blob, b"")?;
20    }
21    Ok(())
22}
23
24/// Build and write tree object(s) from index entries and return the tree OID.
25///
26/// The `prefix` argument optionally limits the write to a subtree path.
27/// Like [`write_tree_from_index`], but only index entries whose path is listed in `paths`
28/// (repository-relative, as stored in the index) are included in the tree.
29pub fn write_tree_from_index_subset(
30    odb: &Odb,
31    index: &Index,
32    paths: &std::collections::HashSet<Vec<u8>>,
33) -> Result<ObjectId> {
34    ensure_empty_blob_for_intent_to_add(odb, index)?;
35
36    let mut entries: Vec<&IndexEntry> = index
37        .entries
38        .iter()
39        .filter(|entry| {
40            entry.stage() == 0
41                && !entry.intent_to_add()
42                && entry.mode != MODE_TREE
43                && paths.contains(&entry.path)
44        })
45        .collect();
46    entries.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
47    build_tree(odb, &entries, b"")
48}
49
50/// Build and write tree object(s) from index entries and return the tree OID.
51pub fn write_tree_from_index(odb: &Odb, index: &Index, prefix: &str) -> Result<ObjectId> {
52    ensure_empty_blob_for_intent_to_add(odb, index)?;
53
54    let prefix_bytes = prefix.as_bytes();
55    let mut entries: Vec<&IndexEntry> = index
56        .entries
57        .iter()
58        .filter(|entry| {
59            entry.stage() == 0
60                && !entry.intent_to_add()
61                && entry.mode != MODE_TREE
62                && entry.path.starts_with(prefix_bytes)
63        })
64        .collect();
65    entries.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
66    build_tree(odb, &entries, prefix_bytes)
67}
68
69/// Build a valid cache-tree extension from the index and write any missing tree objects.
70///
71/// # Errors
72///
73/// Returns an error if tree object creation fails.
74pub fn build_cache_tree_from_index(odb: &Odb, index: &Index) -> Result<CacheTreeNode> {
75    ensure_empty_blob_for_intent_to_add(odb, index)?;
76    let mut entries: Vec<&IndexEntry> = index
77        .entries
78        .iter()
79        .filter(|entry| entry.stage() == 0 && !entry.intent_to_add() && entry.mode != MODE_TREE)
80        .collect();
81    entries.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
82    build_cache_tree_node(odb, b"", Vec::new(), &entries)
83}
84
85/// Build a cache-tree directly from a **tree object**, preserving Git's raw `entry_count`
86/// semantics. Unlike [`build_cache_tree_from_index`], the per-node `entry_count` is the recursive
87/// number of non-tree entries *as recorded in the tree* — duplicate path entries are counted
88/// separately, exactly as upstream Git's cache-tree does after reading such a tree into the index.
89///
90/// This is the cache-tree real Git attaches after `read-tree`/`reset`/`checkout` populate the index
91/// from a tree. For a tree with duplicate entries (`t4058-diff-duplicates`) the resulting
92/// `entry_count` exceeds the number of (deduplicated) index entries, so [`verify_cache_tree`] later
93/// reports "corrupted cache-tree has entries not present in index".
94pub fn build_cache_tree_from_tree(odb: &Odb, tree_oid: &ObjectId) -> Result<CacheTreeNode> {
95    build_cache_tree_from_tree_named(odb, tree_oid, Vec::new())
96}
97
98fn build_cache_tree_from_tree_named(
99    odb: &Odb,
100    tree_oid: &ObjectId,
101    name: Vec<u8>,
102) -> Result<CacheTreeNode> {
103    let obj = odb.read(tree_oid)?;
104    let entries = parse_tree(&obj.data)?;
105
106    let mut entry_count: i32 = 0;
107    let mut children: Vec<CacheTreeNode> = Vec::new();
108    for te in entries {
109        if te.mode == MODE_TREE {
110            let child = build_cache_tree_from_tree_named(odb, &te.oid, te.name.clone())?;
111            entry_count = entry_count.saturating_add(child.entry_count.max(0));
112            children.push(child);
113        } else {
114            entry_count = entry_count.saturating_add(1);
115        }
116    }
117    // Cache-tree children are stored sorted by name (plain byte order, no tree/dir suffix rule).
118    children.sort_by(|a, b| a.name.cmp(&b.name));
119    Ok(CacheTreeNode::valid(name, entry_count, *tree_oid, children))
120}
121
122/// Walk a cache-tree against `index`, mirroring Git's `verify_one` / `cache_tree_verify`.
123///
124/// Returns an error describing the first inconsistency. The only condition exercised by
125/// `t4058-diff-duplicates` is a node whose `entry_count` runs past the end of the index
126/// (`entry_count + pos > cache_nr`), which yields the exact upstream message
127/// "corrupted cache-tree has entries not present in index".
128///
129/// This is gated by callers behind `GIT_TEST_CHECK_CACHE_TREE`, matching upstream's
130/// `write_locked_index`.
131pub fn verify_cache_tree(index: &Index) -> Result<()> {
132    let Some(root) = index.cache_tree.as_ref() else {
133        return Ok(());
134    };
135    // Stage-0, non-tree entries in canonical (path) order — the layout the cache-tree indexes.
136    let mut cache: Vec<&IndexEntry> = index
137        .entries
138        .iter()
139        .filter(|e| e.stage() == 0 && e.mode != MODE_TREE)
140        .collect();
141    cache.sort_by(|a, b| a.path.cmp(&b.path));
142    verify_cache_tree_one(root, &cache, &mut Vec::new())
143}
144
145fn verify_cache_tree_one(
146    node: &CacheTreeNode,
147    cache: &[&IndexEntry],
148    path: &mut Vec<u8>,
149) -> Result<()> {
150    let len = path.len();
151    for child in &node.children {
152        path.extend_from_slice(&child.name);
153        path.push(b'/');
154        verify_cache_tree_one(child, cache, path)?;
155        path.truncate(len);
156    }
157
158    if node.entry_count < 0 {
159        return Ok(());
160    }
161
162    // Position of the first cache entry whose path is at/under this node's directory prefix.
163    let pos = match cache.binary_search_by(|e| e.path.as_slice().cmp(path.as_slice())) {
164        Ok(p) => p,
165        Err(p) => p,
166    };
167
168    if (node.entry_count as usize).saturating_add(pos) > cache.len() {
169        return Err(crate::error::Error::CacheTreeCorrupt);
170    }
171    Ok(())
172}
173
174fn build_tree(odb: &Odb, entries: &[&IndexEntry], dir_prefix: &[u8]) -> Result<ObjectId> {
175    let mut children: BTreeMap<Vec<u8>, ChildKind> = BTreeMap::new();
176
177    for entry in entries {
178        let path = &entry.path;
179        let rel = if dir_prefix.is_empty() {
180            path.as_slice()
181        } else {
182            path.strip_prefix(dir_prefix)
183                .and_then(|suffix| suffix.strip_prefix(b"/"))
184                .unwrap_or(path.as_slice())
185        };
186
187        if let Some(slash_pos) = rel.iter().position(|&byte| byte == b'/') {
188            let child_name = rel[..slash_pos].to_vec();
189            let sub_prefix = if dir_prefix.is_empty() {
190                child_name.clone()
191            } else {
192                let mut sub_prefix = dir_prefix.to_vec();
193                sub_prefix.push(b'/');
194                sub_prefix.extend_from_slice(&child_name);
195                sub_prefix
196            };
197            children
198                .entry(child_name)
199                .or_insert_with(|| ChildKind::Tree(sub_prefix, Vec::new()))
200                .push_entry(entry);
201        } else {
202            children
203                .entry(rel.to_vec())
204                .or_insert_with(|| ChildKind::Blob {
205                    mode: canonicalize_blob_mode(entry.mode),
206                    oid: entry.oid,
207                });
208        }
209    }
210
211    let mut tree_entries = Vec::with_capacity(children.len());
212    for (name, child) in children {
213        match child {
214            ChildKind::Blob { mode, oid } => tree_entries.push(TreeEntry { mode, name, oid }),
215            ChildKind::Tree(sub_prefix, sub_entries) => {
216                let sub_oid = build_tree(odb, &sub_entries, &sub_prefix)?;
217                tree_entries.push(TreeEntry {
218                    mode: MODE_TREE,
219                    name,
220                    oid: sub_oid,
221                });
222            }
223        }
224    }
225
226    tree_entries.sort_by(|a, b| {
227        let a_tree = a.mode == MODE_TREE;
228        let b_tree = b.mode == MODE_TREE;
229        tree_entry_cmp(&a.name, a_tree, &b.name, b_tree)
230    });
231
232    let data = serialize_tree(&tree_entries);
233    freshen_tree_entries(odb, &tree_entries);
234    odb.write(ObjectKind::Tree, &data)
235}
236
237fn build_cache_tree_node(
238    odb: &Odb,
239    dir_prefix: &[u8],
240    name: Vec<u8>,
241    entries: &[&IndexEntry],
242) -> Result<CacheTreeNode> {
243    let mut children: BTreeMap<Vec<u8>, ChildKind> = BTreeMap::new();
244
245    for entry in entries {
246        let path = &entry.path;
247        let rel = if dir_prefix.is_empty() {
248            path.as_slice()
249        } else {
250            path.strip_prefix(dir_prefix)
251                .and_then(|suffix| suffix.strip_prefix(b"/"))
252                .unwrap_or(path.as_slice())
253        };
254
255        if let Some(slash_pos) = rel.iter().position(|&byte| byte == b'/') {
256            let child_name = rel[..slash_pos].to_vec();
257            let sub_prefix = if dir_prefix.is_empty() {
258                child_name.clone()
259            } else {
260                let mut sub_prefix = dir_prefix.to_vec();
261                sub_prefix.push(b'/');
262                sub_prefix.extend_from_slice(&child_name);
263                sub_prefix
264            };
265            children
266                .entry(child_name)
267                .or_insert_with(|| ChildKind::Tree(sub_prefix, Vec::new()))
268                .push_entry(entry);
269        } else {
270            children
271                .entry(rel.to_vec())
272                .or_insert_with(|| ChildKind::Blob {
273                    mode: canonicalize_blob_mode(entry.mode),
274                    oid: entry.oid,
275                });
276        }
277    }
278
279    let mut tree_entries = Vec::with_capacity(children.len());
280    let mut cache_children = Vec::new();
281    for (child_name, child) in children {
282        match child {
283            ChildKind::Blob { mode, oid } => tree_entries.push(TreeEntry {
284                mode,
285                name: child_name,
286                oid,
287            }),
288            ChildKind::Tree(sub_prefix, sub_entries) => {
289                let child_node =
290                    build_cache_tree_node(odb, &sub_prefix, child_name.clone(), &sub_entries)?;
291                let oid = child_node.oid.ok_or_else(|| {
292                    crate::error::Error::IndexError("cache-tree child missing oid".to_owned())
293                })?;
294                tree_entries.push(TreeEntry {
295                    mode: MODE_TREE,
296                    name: child_name,
297                    oid,
298                });
299                cache_children.push(child_node);
300            }
301        }
302    }
303
304    tree_entries.sort_by(|a, b| {
305        let a_tree = a.mode == MODE_TREE;
306        let b_tree = b.mode == MODE_TREE;
307        tree_entry_cmp(&a.name, a_tree, &b.name, b_tree)
308    });
309    cache_children.sort_by(|a, b| a.name.cmp(&b.name));
310
311    let data = serialize_tree(&tree_entries);
312    freshen_tree_entries(odb, &tree_entries);
313    let oid = odb.write(ObjectKind::Tree, &data)?;
314    Ok(CacheTreeNode::valid(
315        name,
316        entries.len() as i32,
317        oid,
318        cache_children,
319    ))
320}
321
322/// Build a tree for a **partial** commit: paths listed in `paths_from_index` (repository-relative,
323/// UTF-8 path bytes) are taken from `index`; every other path is copied from `base_tree_oid`
324/// (typically `HEAD^{tree}`).
325///
326/// This matches Git's behaviour when committing with pathspecs while the index contains additional
327/// staged paths: the commit tree merges `HEAD` with only the pathspec-selected index updates.
328pub fn write_tree_partial_from_index(
329    odb: &Odb,
330    index: &Index,
331    base_tree_oid: &ObjectId,
332    paths_from_index: &std::collections::HashSet<Vec<u8>>,
333) -> Result<ObjectId> {
334    let _ = odb.write(ObjectKind::Blob, b"");
335
336    fn full_path(prefix: &[u8], name: &[u8]) -> Vec<u8> {
337        if prefix.is_empty() {
338            name.to_vec()
339        } else {
340            let mut p = prefix.to_vec();
341            p.push(b'/');
342            p.extend_from_slice(name);
343            p
344        }
345    }
346
347    fn subtree_affected(paths_from_index: &std::collections::HashSet<Vec<u8>>, dir: &[u8]) -> bool {
348        paths_from_index
349            .iter()
350            .any(|p| p == dir || (p.starts_with(dir) && p.get(dir.len()) == Some(&b'/')))
351    }
352
353    fn index_has_entry_under(index: &Index, dir: &[u8]) -> bool {
354        index.entries.iter().any(|entry| {
355            entry.stage() == 0
356                && !entry.intent_to_add()
357                && entry.mode != MODE_TREE
358                && entry.path.starts_with(dir)
359                && entry.path.get(dir.len()) == Some(&b'/')
360        })
361    }
362
363    fn merge_level(
364        odb: &Odb,
365        index: &Index,
366        base_tree_oid: &ObjectId,
367        prefix: &[u8],
368        paths_from_index: &std::collections::HashSet<Vec<u8>>,
369    ) -> Result<ObjectId> {
370        let base_obj = odb.read(base_tree_oid)?;
371        let base_entries = parse_tree(&base_obj.data)?;
372
373        let mut by_name: BTreeMap<Vec<u8>, TreeEntry> = BTreeMap::new();
374        for te in base_entries {
375            let fp = full_path(prefix, &te.name);
376            if !subtree_affected(paths_from_index, &fp) {
377                by_name.insert(te.name.clone(), te);
378            } else if te.mode == MODE_TREE {
379                if paths_from_index.contains(&fp) && !index_has_entry_under(index, &fp) {
380                    continue;
381                }
382                let sub_oid = merge_level(odb, index, &te.oid, &fp, paths_from_index)?;
383                by_name.insert(
384                    te.name.clone(),
385                    TreeEntry {
386                        mode: MODE_TREE,
387                        name: te.name,
388                        oid: sub_oid,
389                    },
390                );
391            } else if paths_from_index.contains(&fp) {
392                if let Some(ie) = index.entries.iter().find(|e| {
393                    e.stage() == 0 && !e.intent_to_add() && e.mode != MODE_TREE && e.path == fp
394                }) {
395                    by_name.insert(
396                        te.name.clone(),
397                        TreeEntry {
398                            mode: canonicalize_blob_mode(ie.mode),
399                            name: te.name,
400                            oid: ie.oid,
401                        },
402                    );
403                }
404                // No index entry: path was removed — omit from the merged tree.
405            } else {
406                by_name.insert(te.name.clone(), te);
407            }
408        }
409
410        for ie in &index.entries {
411            if ie.stage() != 0 || ie.intent_to_add() || ie.mode == MODE_TREE {
412                continue;
413            }
414            if !paths_from_index.contains(&ie.path) {
415                continue;
416            }
417            let rel = if prefix.is_empty() {
418                ie.path.as_slice()
419            } else if ie.path.starts_with(prefix) && ie.path.get(prefix.len()) == Some(&b'/') {
420                &ie.path[prefix.len() + 1..]
421            } else {
422                continue;
423            };
424            if rel.is_empty() {
425                continue;
426            }
427            if let Some(slash) = rel.iter().position(|&b| b == b'/') {
428                let dir_name = rel[..slash].to_vec();
429                if by_name.contains_key(&dir_name) {
430                    continue;
431                }
432                let sub_prefix = full_path(prefix, &dir_name);
433                let sub_oid =
434                    write_tree_from_index(odb, index, &String::from_utf8_lossy(&sub_prefix))?;
435                by_name.insert(
436                    dir_name.clone(),
437                    TreeEntry {
438                        mode: MODE_TREE,
439                        name: dir_name,
440                        oid: sub_oid,
441                    },
442                );
443            } else {
444                let name = rel.to_vec();
445                if !by_name.contains_key(&name) {
446                    by_name.insert(
447                        name.clone(),
448                        TreeEntry {
449                            mode: canonicalize_blob_mode(ie.mode),
450                            name,
451                            oid: ie.oid,
452                        },
453                    );
454                }
455            }
456        }
457
458        let mut out: Vec<TreeEntry> = by_name.into_values().collect();
459        out.sort_by(|a, b| {
460            let a_tree = a.mode == MODE_TREE;
461            let b_tree = b.mode == MODE_TREE;
462            tree_entry_cmp(&a.name, a_tree, &b.name, b_tree)
463        });
464        let data = serialize_tree(&out);
465        freshen_tree_entries(odb, &out);
466        odb.write(ObjectKind::Tree, &data)
467    }
468
469    merge_level(odb, index, base_tree_oid, b"", paths_from_index)
470}
471
472fn freshen_tree_entries(odb: &Odb, tree_entries: &[TreeEntry]) {
473    for entry in tree_entries {
474        let _ = odb.freshen_object(&entry.oid);
475    }
476}
477
478fn canonicalize_blob_mode(mode: u32) -> u32 {
479    match mode & 0o170000 {
480        0o120000 => MODE_SYMLINK,
481        0o160000 => MODE_GITLINK,
482        0o100000 => {
483            if mode & 0o111 != 0 {
484                MODE_EXECUTABLE
485            } else {
486                MODE_REGULAR
487            }
488        }
489        _ => MODE_REGULAR,
490    }
491}
492
493enum ChildKind<'a> {
494    Blob { mode: u32, oid: ObjectId },
495    Tree(Vec<u8>, Vec<&'a IndexEntry>),
496}
497
498impl<'a> ChildKind<'a> {
499    fn push_entry(&mut self, entry: &'a IndexEntry) {
500        if let Self::Tree(_, entries) = self {
501            entries.push(entry);
502        }
503    }
504}
505
506#[cfg(test)]
507mod tests {
508    #![allow(clippy::expect_used, clippy::unwrap_used)]
509
510    use super::*;
511    use crate::index::{IndexEntry, MODE_EXECUTABLE, MODE_REGULAR, MODE_SYMLINK, MODE_TREE};
512    use crate::objects::parse_tree;
513    use tempfile::TempDir;
514
515    fn entry(path: &str, mode: u32, oid: ObjectId) -> IndexEntry {
516        IndexEntry {
517            ctime_sec: 0,
518            ctime_nsec: 0,
519            mtime_sec: 0,
520            mtime_nsec: 0,
521            dev: 0,
522            ino: 0,
523            mode,
524            uid: 0,
525            gid: 0,
526            size: 0,
527            oid,
528            flags: path.len().min(0xFFF) as u16,
529            flags_extended: None,
530            path: path.as_bytes().to_vec(),
531            base_index_pos: 0,
532        }
533    }
534
535    #[test]
536    fn writes_sorted_tree_with_canonical_modes() {
537        let temp_dir = TempDir::new().unwrap();
538        let odb = Odb::new(temp_dir.path());
539
540        let oid_a = odb.write(ObjectKind::Blob, b"a").unwrap();
541        let oid_exec = odb.write(ObjectKind::Blob, b"exec").unwrap();
542        let oid_link = odb.write(ObjectKind::Blob, b"target").unwrap();
543
544        let mut index = Index::new();
545        index.add_or_replace(entry("bin/run.sh", 0o100777, oid_exec));
546        index.add_or_replace(entry("link", 0o120777, oid_link));
547        index.add_or_replace(entry("a.txt", 0o100664, oid_a));
548
549        let root_oid = write_tree_from_index(&odb, &index, "").unwrap();
550        let root_tree_obj = odb.read(&root_oid).unwrap();
551        let root_entries = parse_tree(&root_tree_obj.data).unwrap();
552
553        assert_eq!(root_entries.len(), 3);
554        assert_eq!(root_entries[0].name, b"a.txt");
555        assert_eq!(root_entries[0].mode, MODE_REGULAR);
556        assert_eq!(root_entries[1].name, b"bin");
557        assert_eq!(root_entries[1].mode, MODE_TREE);
558        assert_eq!(root_entries[2].name, b"link");
559        assert_eq!(root_entries[2].mode, MODE_SYMLINK);
560
561        let bin_tree_obj = odb.read(&root_entries[1].oid).unwrap();
562        let bin_entries = parse_tree(&bin_tree_obj.data).unwrap();
563        assert_eq!(bin_entries.len(), 1);
564        assert_eq!(bin_entries[0].name, b"run.sh");
565        assert_eq!(bin_entries[0].mode, MODE_EXECUTABLE);
566    }
567}