Skip to main content

git_internal/internal/object/
tree.rs

1//! In Git, a tree object is used to represent the state of a directory at a specific point in time.
2//! It stores information about the files and directories within that directory, including their names,
3//! permissions, and the IDs of the objects that represent their contents.
4//!
5//! A tree object can contain other tree objects as well as blob objects, which represent the contents
6//! of individual files. The object IDs of these child objects are stored within the tree object itself.
7//!
8//! When you make a commit in Git, you create a new tree object that represents the state of the
9//! repository at that point in time. The parent of the new commit is typically the tree object
10//! representing the previous state of the repository.
11//!
12//! Git uses the tree object to efficiently store and manage the contents of a repository. By
13//! representing the contents of a directory as a tree object, Git can quickly determine which files
14//! have been added, modified, or deleted between two points in time. This allows Git to perform
15//! operations like merging and rebasing more quickly and accurately.
16//!
17use std::fmt::Display;
18
19use colored::Colorize;
20use encoding_rs::GBK;
21
22use crate::{
23    errors::GitError,
24    hash::{ObjectHash, get_hash_kind},
25    internal::object::{ObjectTrait, ObjectType},
26};
27
28/// In Git, the mode field in a tree object's entry specifies the type of the object represented by
29/// that entry. The mode is a three-digit octal number that encodes both the permissions and the
30/// type of the object. The first digit specifies the object type, and the remaining two digits
31/// specify the file mode or permissions.
32#[derive(
33    PartialEq,
34    Eq,
35    Debug,
36    Clone,
37    Copy,
38    serde::Serialize,
39    serde::Deserialize,
40    Hash,
41    rkyv::Archive,
42    rkyv::Serialize,
43    rkyv::Deserialize,
44)]
45pub enum TreeItemMode {
46    Blob,
47    BlobExecutable,
48    Tree,
49    Commit,
50    Link,
51}
52
53impl Display for TreeItemMode {
54    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
55        let _print = match *self {
56            TreeItemMode::Blob => "blob",
57            TreeItemMode::BlobExecutable => "blob executable",
58            TreeItemMode::Tree => "tree",
59            TreeItemMode::Commit => "commit",
60            TreeItemMode::Link => "link",
61        };
62
63        write!(f, "{}", String::from(_print).blue())
64    }
65}
66
67impl TreeItemMode {
68    /// Convert a 32-bit mode to a TreeItemType
69    ///
70    /// |0100000000000000| (040000)| Directory|
71    /// |1000000110100100| (100644)| Regular non-executable file|
72    /// |1000000110110100| (100664)| Regular non-executable group-writeable file|
73    /// |1000000111101101| (100755)| Regular executable file|
74    /// |1010000000000000| (120000)| Symbolic link|
75    /// |1110000000000000| (160000)| Gitlink|
76    /// ---
77    /// # GitLink
78    /// Gitlink, also known as a submodule, is a feature in Git that allows you to include a Git
79    /// repository as a subdirectory within another Git repository. This is useful when you want to
80    /// incorporate code from another project into your own project, without having to manually copy
81    /// the code into your repository.
82    ///
83    /// When you add a submodule to your Git repository, Git stores a reference to the other
84    /// repository at a specific commit. This means that your repository will always point to a
85    /// specific version of the other repository, even if changes are made to the submodule's code
86    /// in the future.
87    ///
88    /// To work with a submodule in Git, you use commands like git submodule add, git submodule
89    /// update, and git submodule init. These commands allow you to add a submodule to your repository,
90    /// update it to the latest version, and initialize it for use.
91    ///
92    /// Submodules can be a powerful tool for managing dependencies between different projects and
93    /// components. However, they can also add complexity to your workflow, so it's important to
94    /// understand how they work and when to use them.
95    pub fn tree_item_type_from_bytes(mode: &[u8]) -> Result<TreeItemMode, GitError> {
96        Ok(match mode {
97            b"40000" => TreeItemMode::Tree,
98            b"100644" => TreeItemMode::Blob,
99            b"100755" => TreeItemMode::BlobExecutable,
100            b"120000" => TreeItemMode::Link,
101            b"160000" => TreeItemMode::Commit,
102            b"100664" => TreeItemMode::Blob,
103            b"100640" => TreeItemMode::Blob,
104            _ => {
105                return Err(GitError::InvalidTreeItem(
106                    String::from_utf8(mode.to_vec()).unwrap(),
107                ));
108            }
109        })
110    }
111
112    /// 32-bit mode, split into (high to low bits):
113    /// - 4-bit object type: valid values in binary are 1000 (regular file), 1010 (symbolic link) and 1110 (gitlink)
114    /// - 3-bit unused
115    /// - 9-bit unix permission: Only 0755 and 0644 are valid for regular files. Symbolic links and gitlink have value 0 in this field.
116    pub fn to_bytes(self) -> &'static [u8] {
117        match self {
118            TreeItemMode::Blob => b"100644",
119            TreeItemMode::BlobExecutable => b"100755",
120            TreeItemMode::Link => b"120000",
121            TreeItemMode::Tree => b"40000",
122            TreeItemMode::Commit => b"160000",
123        }
124    }
125}
126
127/// A tree object contains a list of entries, one for each file or directory in the tree. Each entry
128/// in the file represents an entry in the tree, and each entry has the following format:
129///
130/// ```bash
131/// <mode> <name>\0<binary object ID>
132/// ```
133/// - `<mode>` is the mode of the object, represented as a six-digit octal number. The first digit
134///   represents the object type (tree, blob, etc.), and the remaining digits represent the file mode or permissions.
135/// - `<name>` is the name of the object.
136/// - `\0` is a null byte separator.
137/// - `<binary object ID>` is the ID of the object that represents the contents of the file or
138///   directory, represented as a binary SHA-1 hash.
139///
140/// # Example
141/// ```bash
142/// 100644 hello-world\0<blob object ID>
143/// 040000 data\0<tree object ID>
144/// ```
145#[derive(
146    PartialEq,
147    Eq,
148    Debug,
149    Clone,
150    serde::Serialize,
151    serde::Deserialize,
152    Hash,
153    rkyv::Archive,
154    rkyv::Serialize,
155    rkyv::Deserialize,
156)]
157pub struct TreeItem {
158    pub mode: TreeItemMode,
159    pub id: ObjectHash,
160    pub name: String,
161}
162
163impl Display for TreeItem {
164    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
165        write!(
166            f,
167            "{} {} {}",
168            self.mode,
169            self.name,
170            self.id.to_string().blue()
171        )
172    }
173}
174
175impl TreeItem {
176    /// Creates a new [`TreeItem`] with the given mode, object ID, and name.
177    pub fn new(mode: TreeItemMode, id: ObjectHash, name: String) -> Self {
178        TreeItem { mode, id, name }
179    }
180
181    /// Create a new TreeItem from a byte vector, split into a mode, id and name, the TreeItem format is:
182    ///
183    /// ```bash
184    /// <mode> <name>\0<binary object ID>
185    /// ```
186    ///
187    pub fn from_bytes(bytes: &[u8]) -> Result<Self, GitError> {
188        let mut parts = bytes.splitn(2, |b| *b == b' ');
189        let mode = parts.next().unwrap();
190        let rest = parts.next().unwrap();
191        let mut parts = rest.splitn(2, |b| *b == b'\0');
192        let raw_name = parts.next().unwrap();
193        let id = parts.next().unwrap();
194
195        let name = if String::from_utf8(raw_name.to_vec()).is_ok() {
196            String::from_utf8(raw_name.to_vec()).unwrap()
197        } else {
198            let (decoded, _, had_errors) = GBK.decode(raw_name);
199            if had_errors {
200                return Err(GitError::InvalidTreeItem(format!(
201                    "Unsupported raw format: {raw_name:?}"
202                )));
203            } else {
204                decoded.to_string()
205            }
206        };
207        Ok(TreeItem {
208            mode: TreeItemMode::tree_item_type_from_bytes(mode)?,
209            id: ObjectHash::from_bytes(id).unwrap(),
210            name,
211        })
212    }
213
214    /// Convert a TreeItem to a byte vector
215    /// ```rust
216    /// use std::str::FromStr;
217    /// use git_internal::internal::object::tree::{TreeItem, TreeItemMode};
218    /// use git_internal::hash::ObjectHash;
219    ///
220    /// let tree_item = TreeItem::new(
221    ///     TreeItemMode::Blob,
222    ///     ObjectHash::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
223    ///     "hello-world".to_string(),
224    /// );
225    ///
226    //  let bytes = tree_item.to_bytes();
227    /// ```
228    pub fn to_data(&self) -> Vec<u8> {
229        let mut bytes = Vec::new();
230
231        bytes.extend_from_slice(self.mode.to_bytes());
232        bytes.push(b' ');
233        bytes.extend_from_slice(self.name.as_bytes());
234        bytes.push(b'\0');
235        bytes.extend_from_slice(&self.id.to_data());
236
237        bytes
238    }
239
240    /// Returns `true` if this item represents a subdirectory (tree).
241    pub fn is_tree(&self) -> bool {
242        self.mode == TreeItemMode::Tree
243    }
244
245    /// Returns `true` if this item represents a regular file (blob).
246    pub fn is_blob(&self) -> bool {
247        self.mode == TreeItemMode::Blob
248    }
249}
250
251/// A tree object is a Git object that represents a directory. It contains a list of entries, one
252/// for each file or directory in the tree.
253#[derive(
254    Eq,
255    Debug,
256    Clone,
257    serde::Serialize,
258    serde::Deserialize,
259    rkyv::Archive,
260    rkyv::Serialize,
261    rkyv::Deserialize,
262)]
263pub struct Tree {
264    pub id: ObjectHash,
265    pub tree_items: Vec<TreeItem>,
266}
267
268impl PartialEq for Tree {
269    fn eq(&self, other: &Self) -> bool {
270        self.id == other.id
271    }
272}
273
274impl Display for Tree {
275    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
276        writeln!(f, "Tree: {}", self.id.to_string().blue())?;
277        for item in &self.tree_items {
278            writeln!(f, "{item}")?;
279        }
280        Ok(())
281    }
282}
283
284impl Tree {
285    /// Create a new Tree from a list of TreeItems
286    pub fn from_tree_items(tree_items: Vec<TreeItem>) -> Result<Self, GitError> {
287        if tree_items.is_empty() {
288            return Err(GitError::EmptyTreeItems(
289                "When export tree object to meta, the items is empty"
290                    .parse()
291                    .unwrap(),
292            ));
293        }
294        let mut data = Vec::new();
295        for item in &tree_items {
296            data.extend_from_slice(item.to_data().as_slice());
297        }
298
299        Ok(Tree {
300            id: ObjectHash::from_type_and_data(ObjectType::Tree, &data),
301            tree_items,
302        })
303    }
304
305    /// After the subdirectory is changed, the hash value of the tree is recalculated.
306    pub fn rehash(&mut self) {
307        let mut data = Vec::new();
308        for item in &self.tree_items {
309            data.extend_from_slice(item.to_data().as_slice());
310        }
311        self.id = ObjectHash::from_type_and_data(ObjectType::Tree, &data);
312    }
313}
314
315impl TryFrom<&[u8]> for Tree {
316    type Error = GitError;
317    fn try_from(data: &[u8]) -> Result<Self, Self::Error> {
318        let h = ObjectHash::from_type_and_data(ObjectType::Tree, data);
319        Tree::from_bytes(data, h)
320    }
321}
322impl ObjectTrait for Tree {
323    fn from_bytes(data: &[u8], hash: ObjectHash) -> Result<Self, GitError>
324    where
325        Self: Sized,
326    {
327        let mut tree_items = Vec::new();
328        let mut i = 0;
329        while i < data.len() {
330            // Find the position of the null byte (0x00)
331            if let Some(index) = memchr::memchr(0x00, &data[i..]) {
332                // Calculate the next position
333                let next = i + index + get_hash_kind().size() + 1; // +1 for the null byte
334                if next > data.len() {
335                    return Err(GitError::InvalidTreeObject);
336                } //check bounds TreeItem::from_bytes will panic if out of bounds
337                // Extract the bytes and create a TreeItem
338                let item_data = &data[i..next];
339                let tree_item = TreeItem::from_bytes(item_data)?;
340
341                tree_items.push(tree_item);
342
343                i = next;
344            } else {
345                // If no null byte is found, return an error
346                return Err(GitError::InvalidTreeObject);
347            }
348        }
349
350        Ok(Tree {
351            id: hash,
352            tree_items,
353        })
354    }
355
356    fn get_type(&self) -> ObjectType {
357        ObjectType::Tree
358    }
359
360    fn get_size(&self) -> usize {
361        self.to_data().map(|data| data.len()).unwrap_or(0)
362    }
363
364    fn to_data(&self) -> Result<Vec<u8>, GitError> {
365        let mut data: Vec<u8> = Vec::new();
366
367        for item in &self.tree_items {
368            data.extend_from_slice(item.to_data().as_slice());
369            //data.push(b'\0');
370        }
371
372        Ok(data)
373    }
374}
375
376#[cfg(test)]
377mod tests {
378
379    use std::str::FromStr;
380
381    use crate::{
382        hash::{HashKind, ObjectHash, set_hash_kind_for_test},
383        internal::object::tree::{Tree, TreeItem, TreeItemMode},
384    };
385
386    /// Helper: roundtrip a single TreeItem under a given hash kind.
387    fn tree_item_round_trip(kind: HashKind, id_hex: &str) {
388        let _guard = set_hash_kind_for_test(kind);
389        let item = TreeItem::new(
390            TreeItemMode::Blob,
391            ObjectHash::from_str(id_hex).unwrap(),
392            "hello-world".to_string(),
393        );
394
395        let bytes = item.to_data();
396        let parsed = TreeItem::from_bytes(&bytes).unwrap();
397
398        assert_eq!(parsed.mode, TreeItemMode::Blob);
399        assert_eq!(parsed.id, item.id);
400        assert_eq!(parsed.name, item.name);
401    }
402
403    /// TreeItem new/to_data/from_bytes roundtrip with SHA-1.
404    #[test]
405    fn tree_item_round_trip_sha1() {
406        tree_item_round_trip(HashKind::Sha1, "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
407    }
408
409    /// TreeItem new/to_data/from_bytes roundtrip with SHA-256.
410    #[test]
411    fn tree_item_round_trip_sha256() {
412        tree_item_round_trip(
413            HashKind::Sha256,
414            "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
415        );
416    }
417
418    /// Helper: build a tree from items and assert the resulting ID.
419    fn tree_round_trip(kind: HashKind, items: Vec<(&str, &str)>, expected_id: &str) {
420        let _guard = set_hash_kind_for_test(kind);
421        let tree_items = items
422            .into_iter()
423            .map(|(name, id_hex)| {
424                TreeItem::new(
425                    TreeItemMode::Blob,
426                    ObjectHash::from_str(id_hex).unwrap(),
427                    name.to_string(),
428                )
429            })
430            .collect::<Vec<_>>();
431        let tree = Tree::from_tree_items(tree_items).unwrap();
432        assert_eq!(tree.id.to_string(), expected_id);
433    }
434
435    /// Tree construction from items (SHA-1).
436    #[test]
437    fn tree_from_items_sha1() {
438        tree_round_trip(
439            HashKind::Sha1,
440            vec![("hello-world", "17288789afffb273c8c394bc65e87d899b92897b")],
441            "cf99336fa61439a2f074c7e6de1c1a05579550e2",
442        );
443    }
444
445    /// Tree construction from items (SHA-256).
446    #[test]
447    fn tree_from_items_sha256() {
448        tree_round_trip(
449            HashKind::Sha256,
450            vec![
451                (
452                    "a.txt",
453                    "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
454                ),
455                (
456                    "b.txt",
457                    "fc2593998f8e1dec9c3a8be11557888134dad90ef5c7a2d6236ed75534c7698e",
458                ),
459                (
460                    "c.txt",
461                    "21513dcb4d6f9eb247db3b4c52158395d94f809cbaa2630bd2a7a474d9b39fab",
462                ),
463                (
464                    "hello-world",
465                    "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
466                ),
467                (
468                    "message.txt",
469                    "9ba9ae56288652bf32f074f922e37d3e95df8920b3cdfc053309595b8f86cbc6",
470                ),
471            ],
472            "d712a36aadfb47cabc7aaa90cf9e515773ba3bfc1fe3783730b387ce15c49261",
473        );
474    }
475}