git_internal/internal/object/
tree.rs

1//! In Git, a tree object is used to represent the state of a directory at a specific point in time.
2//! It stores information about the files and directories within that directory, including their names,
3//! permissions, and the IDs of the objects that represent their contents.
4//!
5//! A tree object can contain other tree objects as well as blob objects, which represent the contents
6//! of individual files. The object IDs of these child objects are stored within the tree object itself.
7//!
8//! When you make a commit in Git, you create a new tree object that represents the state of the
9//! repository at that point in time. The parent of the new commit is typically the tree object
10//! representing the previous state of the repository.
11//!
12//! Git uses the tree object to efficiently store and manage the contents of a repository. By
13//! representing the contents of a directory as a tree object, Git can quickly determine which files
14//! have been added, modified, or deleted between two points in time. This allows Git to perform
15//! operations like merging and rebasing more quickly and accurately.
16//!
17use std::fmt::Display;
18
19use bincode::{Decode, Encode};
20use colored::Colorize;
21use encoding_rs::GBK;
22use serde::{Deserialize, Serialize};
23
24use crate::{
25    errors::GitError,
26    hash::{ObjectHash, get_hash_kind},
27    internal::object::{ObjectTrait, ObjectType},
28};
29
30/// In Git, the mode field in a tree object's entry specifies the type of the object represented by
31/// that entry. The mode is a three-digit octal number that encodes both the permissions and the
32/// type of the object. The first digit specifies the object type, and the remaining two digits
33/// specify the file mode or permissions.
34#[derive(PartialEq, Eq, Debug, Clone, Copy, Serialize, Deserialize, Hash, Encode, Decode)]
35pub enum TreeItemMode {
36    Blob,
37    BlobExecutable,
38    Tree,
39    Commit,
40    Link,
41}
42
43impl Display for TreeItemMode {
44    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
45        let _print = match *self {
46            TreeItemMode::Blob => "blob",
47            TreeItemMode::BlobExecutable => "blob executable",
48            TreeItemMode::Tree => "tree",
49            TreeItemMode::Commit => "commit",
50            TreeItemMode::Link => "link",
51        };
52
53        write!(f, "{}", String::from(_print).blue())
54    }
55}
56
57impl TreeItemMode {
58    /// Convert a 32-bit mode to a TreeItemType
59    ///
60    /// |0100000000000000| (040000)| Directory|
61    /// |1000000110100100| (100644)| Regular non-executable file|
62    /// |1000000110110100| (100664)| Regular non-executable group-writeable file|
63    /// |1000000111101101| (100755)| Regular executable file|
64    /// |1010000000000000| (120000)| Symbolic link|
65    /// |1110000000000000| (160000)| Gitlink|
66    /// ---
67    /// # GitLink
68    /// Gitlink, also known as a submodule, is a feature in Git that allows you to include a Git
69    /// repository as a subdirectory within another Git repository. This is useful when you want to
70    /// incorporate code from another project into your own project, without having to manually copy
71    /// the code into your repository.
72    ///
73    /// When you add a submodule to your Git repository, Git stores a reference to the other
74    /// repository at a specific commit. This means that your repository will always point to a
75    /// specific version of the other repository, even if changes are made to the submodule's code
76    /// in the future.
77    ///
78    /// To work with a submodule in Git, you use commands like git submodule add, git submodule
79    /// update, and git submodule init. These commands allow you to add a submodule to your repository,
80    /// update it to the latest version, and initialize it for use.
81    ///
82    /// Submodules can be a powerful tool for managing dependencies between different projects and
83    /// components. However, they can also add complexity to your workflow, so it's important to
84    /// understand how they work and when to use them.
85    pub fn tree_item_type_from_bytes(mode: &[u8]) -> Result<TreeItemMode, GitError> {
86        Ok(match mode {
87            b"40000" => TreeItemMode::Tree,
88            b"100644" => TreeItemMode::Blob,
89            b"100755" => TreeItemMode::BlobExecutable,
90            b"120000" => TreeItemMode::Link,
91            b"160000" => TreeItemMode::Commit,
92            b"100664" => TreeItemMode::Blob,
93            b"100640" => TreeItemMode::Blob,
94            _ => {
95                return Err(GitError::InvalidTreeItem(
96                    String::from_utf8(mode.to_vec()).unwrap(),
97                ));
98            }
99        })
100    }
101
102    /// 32-bit mode, split into (high to low bits):
103    /// - 4-bit object type: valid values in binary are 1000 (regular file), 1010 (symbolic link) and 1110 (gitlink)
104    /// - 3-bit unused
105    /// - 9-bit unix permission: Only 0755 and 0644 are valid for regular files. Symbolic links and gitlink have value 0 in this field.
106    pub fn to_bytes(self) -> &'static [u8] {
107        match self {
108            TreeItemMode::Blob => b"100644",
109            TreeItemMode::BlobExecutable => b"100755",
110            TreeItemMode::Link => b"120000",
111            TreeItemMode::Tree => b"40000",
112            TreeItemMode::Commit => b"160000",
113        }
114    }
115}
116
117/// A tree object contains a list of entries, one for each file or directory in the tree. Each entry
118/// in the file represents an entry in the tree, and each entry has the following format:
119///
120/// ```bash
121/// <mode> <name>\0<binary object ID>
122/// ```
123/// - `<mode>` is the mode of the object, represented as a six-digit octal number. The first digit
124///   represents the object type (tree, blob, etc.), and the remaining digits represent the file mode or permissions.
125/// - `<name>` is the name of the object.
126/// - `\0` is a null byte separator.
127/// - `<binary object ID>` is the ID of the object that represents the contents of the file or
128///   directory, represented as a binary SHA-1 hash.
129///
130/// # Example
131/// ```bash
132/// 100644 hello-world\0<blob object ID>
133/// 040000 data\0<tree object ID>
134/// ```
135#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Hash, Encode, Decode)]
136pub struct TreeItem {
137    pub mode: TreeItemMode,
138    pub id: ObjectHash,
139    pub name: String,
140}
141
142impl Display for TreeItem {
143    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
144        write!(
145            f,
146            "{} {} {}",
147            self.mode,
148            self.name,
149            self.id.to_string().blue()
150        )
151    }
152}
153
154impl TreeItem {
155    /// Creates a new [`TreeItem`] with the given mode, object ID, and name.
156    pub fn new(mode: TreeItemMode, id: ObjectHash, name: String) -> Self {
157        TreeItem { mode, id, name }
158    }
159
160    /// Create a new TreeItem from a byte vector, split into a mode, id and name, the TreeItem format is:
161    ///
162    /// ```bash
163    /// <mode> <name>\0<binary object ID>
164    /// ```
165    ///
166    pub fn from_bytes(bytes: &[u8]) -> Result<Self, GitError> {
167        let mut parts = bytes.splitn(2, |b| *b == b' ');
168        let mode = parts.next().unwrap();
169        let rest = parts.next().unwrap();
170        let mut parts = rest.splitn(2, |b| *b == b'\0');
171        let raw_name = parts.next().unwrap();
172        let id = parts.next().unwrap();
173
174        let name = if String::from_utf8(raw_name.to_vec()).is_ok() {
175            String::from_utf8(raw_name.to_vec()).unwrap()
176        } else {
177            let (decoded, _, had_errors) = GBK.decode(raw_name);
178            if had_errors {
179                return Err(GitError::InvalidTreeItem(format!(
180                    "Unsupported raw format: {raw_name:?}"
181                )));
182            } else {
183                decoded.to_string()
184            }
185        };
186        Ok(TreeItem {
187            mode: TreeItemMode::tree_item_type_from_bytes(mode)?,
188            id: ObjectHash::from_bytes(id).unwrap(),
189            name,
190        })
191    }
192
193    /// Convert a TreeItem to a byte vector
194    /// ```rust
195    /// use std::str::FromStr;
196    /// use git_internal::internal::object::tree::{TreeItem, TreeItemMode};
197    /// use git_internal::hash::ObjectHash;
198    ///
199    /// let tree_item = TreeItem::new(
200    ///     TreeItemMode::Blob,
201    ///     ObjectHash::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
202    ///     "hello-world".to_string(),
203    /// );
204    ///
205    //  let bytes = tree_item.to_bytes();
206    /// ```
207    pub fn to_data(&self) -> Vec<u8> {
208        let mut bytes = Vec::new();
209
210        bytes.extend_from_slice(self.mode.to_bytes());
211        bytes.push(b' ');
212        bytes.extend_from_slice(self.name.as_bytes());
213        bytes.push(b'\0');
214        bytes.extend_from_slice(&self.id.to_data());
215
216        bytes
217    }
218
219    /// Returns `true` if this item represents a subdirectory (tree).
220    pub fn is_tree(&self) -> bool {
221        self.mode == TreeItemMode::Tree
222    }
223
224    /// Returns `true` if this item represents a regular file (blob).
225    pub fn is_blob(&self) -> bool {
226        self.mode == TreeItemMode::Blob
227    }
228}
229
230/// A tree object is a Git object that represents a directory. It contains a list of entries, one
231/// for each file or directory in the tree.
232#[derive(Eq, Debug, Clone, Serialize, Deserialize, Encode, Decode)]
233pub struct Tree {
234    pub id: ObjectHash,
235    pub tree_items: Vec<TreeItem>,
236}
237
238impl PartialEq for Tree {
239    fn eq(&self, other: &Self) -> bool {
240        self.id == other.id
241    }
242}
243
244impl Display for Tree {
245    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
246        writeln!(f, "Tree: {}", self.id.to_string().blue())?;
247        for item in &self.tree_items {
248            writeln!(f, "{item}")?;
249        }
250        Ok(())
251    }
252}
253
254impl Tree {
255    /// Create a new Tree from a list of TreeItems
256    pub fn from_tree_items(tree_items: Vec<TreeItem>) -> Result<Self, GitError> {
257        if tree_items.is_empty() {
258            return Err(GitError::EmptyTreeItems(
259                "When export tree object to meta, the items is empty"
260                    .parse()
261                    .unwrap(),
262            ));
263        }
264        let mut data = Vec::new();
265        for item in &tree_items {
266            data.extend_from_slice(item.to_data().as_slice());
267        }
268
269        Ok(Tree {
270            id: ObjectHash::from_type_and_data(ObjectType::Tree, &data),
271            tree_items,
272        })
273    }
274
275    /// After the subdirectory is changed, the hash value of the tree is recalculated.
276    pub fn rehash(&mut self) {
277        let mut data = Vec::new();
278        for item in &self.tree_items {
279            data.extend_from_slice(item.to_data().as_slice());
280        }
281        self.id = ObjectHash::from_type_and_data(ObjectType::Tree, &data);
282    }
283}
284
285impl TryFrom<&[u8]> for Tree {
286    type Error = GitError;
287    fn try_from(data: &[u8]) -> Result<Self, Self::Error> {
288        let h = ObjectHash::from_type_and_data(ObjectType::Tree, data);
289        Tree::from_bytes(data, h)
290    }
291}
292impl ObjectTrait for Tree {
293    fn from_bytes(data: &[u8], hash: ObjectHash) -> Result<Self, GitError>
294    where
295        Self: Sized,
296    {
297        let mut tree_items = Vec::new();
298        let mut i = 0;
299        while i < data.len() {
300            // Find the position of the null byte (0x00)
301            if let Some(index) = memchr::memchr(0x00, &data[i..]) {
302                // Calculate the next position
303                let next = i + index + get_hash_kind().size() + 1; // +1 for the null byte
304                if next > data.len() {
305                    return Err(GitError::InvalidTreeObject);
306                } //check bounds TreeItem::from_bytes will panic if out of bounds
307                // Extract the bytes and create a TreeItem
308                let item_data = &data[i..next];
309                let tree_item = TreeItem::from_bytes(item_data)?;
310
311                tree_items.push(tree_item);
312
313                i = next;
314            } else {
315                // If no null byte is found, return an error
316                return Err(GitError::InvalidTreeObject);
317            }
318        }
319
320        Ok(Tree {
321            id: hash,
322            tree_items,
323        })
324    }
325
326    fn get_type(&self) -> ObjectType {
327        ObjectType::Tree
328    }
329
330    fn get_size(&self) -> usize {
331        todo!()
332    }
333
334    fn to_data(&self) -> Result<Vec<u8>, GitError> {
335        let mut data: Vec<u8> = Vec::new();
336
337        for item in &self.tree_items {
338            data.extend_from_slice(item.to_data().as_slice());
339            //data.push(b'\0');
340        }
341
342        Ok(data)
343    }
344}
345
346#[cfg(test)]
347mod tests {
348
349    use std::str::FromStr;
350
351    use crate::{
352        hash::{HashKind, ObjectHash, set_hash_kind_for_test},
353        internal::object::tree::{Tree, TreeItem, TreeItemMode},
354    };
355
356    /// Helper: roundtrip a single TreeItem under a given hash kind.
357    fn tree_item_round_trip(kind: HashKind, id_hex: &str) {
358        let _guard = set_hash_kind_for_test(kind);
359        let item = TreeItem::new(
360            TreeItemMode::Blob,
361            ObjectHash::from_str(id_hex).unwrap(),
362            "hello-world".to_string(),
363        );
364
365        let bytes = item.to_data();
366        let parsed = TreeItem::from_bytes(&bytes).unwrap();
367
368        assert_eq!(parsed.mode, TreeItemMode::Blob);
369        assert_eq!(parsed.id, item.id);
370        assert_eq!(parsed.name, item.name);
371    }
372
373    /// TreeItem new/to_data/from_bytes roundtrip with SHA-1.
374    #[test]
375    fn tree_item_round_trip_sha1() {
376        tree_item_round_trip(HashKind::Sha1, "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
377    }
378
379    /// TreeItem new/to_data/from_bytes roundtrip with SHA-256.
380    #[test]
381    fn tree_item_round_trip_sha256() {
382        tree_item_round_trip(
383            HashKind::Sha256,
384            "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
385        );
386    }
387
388    /// Helper: build a tree from items and assert the resulting ID.
389    fn tree_round_trip(kind: HashKind, items: Vec<(&str, &str)>, expected_id: &str) {
390        let _guard = set_hash_kind_for_test(kind);
391        let tree_items = items
392            .into_iter()
393            .map(|(name, id_hex)| {
394                TreeItem::new(
395                    TreeItemMode::Blob,
396                    ObjectHash::from_str(id_hex).unwrap(),
397                    name.to_string(),
398                )
399            })
400            .collect::<Vec<_>>();
401        let tree = Tree::from_tree_items(tree_items).unwrap();
402        assert_eq!(tree.id.to_string(), expected_id);
403    }
404
405    /// Tree construction from items (SHA-1).
406    #[test]
407    fn tree_from_items_sha1() {
408        tree_round_trip(
409            HashKind::Sha1,
410            vec![("hello-world", "17288789afffb273c8c394bc65e87d899b92897b")],
411            "cf99336fa61439a2f074c7e6de1c1a05579550e2",
412        );
413    }
414
415    /// Tree construction from items (SHA-256).
416    #[test]
417    fn tree_from_items_sha256() {
418        tree_round_trip(
419            HashKind::Sha256,
420            vec![
421                (
422                    "a.txt",
423                    "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
424                ),
425                (
426                    "b.txt",
427                    "fc2593998f8e1dec9c3a8be11557888134dad90ef5c7a2d6236ed75534c7698e",
428                ),
429                (
430                    "c.txt",
431                    "21513dcb4d6f9eb247db3b4c52158395d94f809cbaa2630bd2a7a474d9b39fab",
432                ),
433                (
434                    "hello-world",
435                    "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
436                ),
437                (
438                    "message.txt",
439                    "9ba9ae56288652bf32f074f922e37d3e95df8920b3cdfc053309595b8f86cbc6",
440                ),
441            ],
442            "d712a36aadfb47cabc7aaa90cf9e515773ba3bfc1fe3783730b387ce15c49261",
443        );
444    }
445}