git_internal/internal/object/
tree.rs

1//! In Git, a tree object is used to represent the state of a directory at a specific point in time.
2//! It stores information about the files and directories within that directory, including their names,
3//! permissions, and the IDs of the objects that represent their contents.
4//!
5//! A tree object can contain other tree objects as well as blob objects, which represent the contents
6//! of individual files. The object IDs of these child objects are stored within the tree object itself.
7//!
8//! When you make a commit in Git, you create a new tree object that represents the state of the
9//! repository at that point in time. The parent of the new commit is typically the tree object
10//! representing the previous state of the repository.
11//!
12//! Git uses the tree object to efficiently store and manage the contents of a repository. By
13//! representing the contents of a directory as a tree object, Git can quickly determine which files
14//! have been added, modified, or deleted between two points in time. This allows Git to perform
15//! operations like merging and rebasing more quickly and accurately.
16//!
17use crate::errors::GitError;
18use crate::hash::{ObjectHash, get_hash_kind};
19use crate::internal::object::ObjectTrait;
20use crate::internal::object::ObjectType;
21use bincode::{Decode, Encode};
22use colored::Colorize;
23use encoding_rs::GBK;
24use serde::Deserialize;
25use serde::Serialize;
26use std::fmt::Display;
27
28/// In Git, the mode field in a tree object's entry specifies the type of the object represented by
29/// that entry. The mode is a three-digit octal number that encodes both the permissions and the
30/// type of the object. The first digit specifies the object type, and the remaining two digits
31/// specify the file mode or permissions.
32#[derive(PartialEq, Eq, Debug, Clone, Copy, Serialize, Deserialize, Hash, Encode, Decode)]
33pub enum TreeItemMode {
34    Blob,
35    BlobExecutable,
36    Tree,
37    Commit,
38    Link,
39}
40
41impl Display for TreeItemMode {
42    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
43        let _print = match *self {
44            TreeItemMode::Blob => "blob",
45            TreeItemMode::BlobExecutable => "blob executable",
46            TreeItemMode::Tree => "tree",
47            TreeItemMode::Commit => "commit",
48            TreeItemMode::Link => "link",
49        };
50
51        write!(f, "{}", String::from(_print).blue())
52    }
53}
54
55impl TreeItemMode {
56    /// Convert a 32-bit mode to a TreeItemType
57    ///
58    /// |0100000000000000| (040000)| Directory|
59    /// |1000000110100100| (100644)| Regular non-executable file|
60    /// |1000000110110100| (100664)| Regular non-executable group-writeable file|
61    /// |1000000111101101| (100755)| Regular executable file|
62    /// |1010000000000000| (120000)| Symbolic link|
63    /// |1110000000000000| (160000)| Gitlink|
64    /// ---
65    /// # GitLink
66    /// Gitlink, also known as a submodule, is a feature in Git that allows you to include a Git
67    /// repository as a subdirectory within another Git repository. This is useful when you want to
68    /// incorporate code from another project into your own project, without having to manually copy
69    /// the code into your repository.
70    ///
71    /// When you add a submodule to your Git repository, Git stores a reference to the other
72    /// repository at a specific commit. This means that your repository will always point to a
73    /// specific version of the other repository, even if changes are made to the submodule's code
74    /// in the future.
75    ///
76    /// To work with a submodule in Git, you use commands like git submodule add, git submodule
77    /// update, and git submodule init. These commands allow you to add a submodule to your repository,
78    /// update it to the latest version, and initialize it for use.
79    ///
80    /// Submodules can be a powerful tool for managing dependencies between different projects and
81    /// components. However, they can also add complexity to your workflow, so it's important to
82    /// understand how they work and when to use them.
83    pub fn tree_item_type_from_bytes(mode: &[u8]) -> Result<TreeItemMode, GitError> {
84        Ok(match mode {
85            b"40000" => TreeItemMode::Tree,
86            b"100644" => TreeItemMode::Blob,
87            b"100755" => TreeItemMode::BlobExecutable,
88            b"120000" => TreeItemMode::Link,
89            b"160000" => TreeItemMode::Commit,
90            b"100664" => TreeItemMode::Blob,
91            b"100640" => TreeItemMode::Blob,
92            _ => {
93                return Err(GitError::InvalidTreeItem(
94                    String::from_utf8(mode.to_vec()).unwrap(),
95                ));
96            }
97        })
98    }
99
100    /// 32-bit mode, split into (high to low bits):
101    /// - 4-bit object type: valid values in binary are 1000 (regular file), 1010 (symbolic link) and 1110 (gitlink)
102    /// - 3-bit unused
103    /// - 9-bit unix permission: Only 0755 and 0644 are valid for regular files. Symbolic links and gitlink have value 0 in this field.
104    pub fn to_bytes(self) -> &'static [u8] {
105        match self {
106            TreeItemMode::Blob => b"100644",
107            TreeItemMode::BlobExecutable => b"100755",
108            TreeItemMode::Link => b"120000",
109            TreeItemMode::Tree => b"40000",
110            TreeItemMode::Commit => b"160000",
111        }
112    }
113}
114
115/// A tree object contains a list of entries, one for each file or directory in the tree. Each entry
116/// in the file represents an entry in the tree, and each entry has the following format:
117///
118/// ```bash
119/// <mode> <name>\0<binary object ID>
120/// ```
121/// - `<mode>` is the mode of the object, represented as a six-digit octal number. The first digit
122///   represents the object type (tree, blob, etc.), and the remaining digits represent the file mode or permissions.
123/// - `<name>` is the name of the object.
124/// - `\0` is a null byte separator.
125/// - `<binary object ID>` is the ID of the object that represents the contents of the file or
126///   directory, represented as a binary SHA-1 hash.
127///
128/// # Example
129/// ```bash
130/// 100644 hello-world\0<blob object ID>
131/// 040000 data\0<tree object ID>
132/// ```
133#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Hash, Encode, Decode)]
134pub struct TreeItem {
135    pub mode: TreeItemMode,
136    pub id: ObjectHash,
137    pub name: String,
138}
139
140impl Display for TreeItem {
141    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
142        write!(
143            f,
144            "{} {} {}",
145            self.mode,
146            self.name,
147            self.id.to_string().blue()
148        )
149    }
150}
151
152impl TreeItem {
153    // Create a new TreeItem from a mode, id and name
154    pub fn new(mode: TreeItemMode, id: ObjectHash, name: String) -> Self {
155        TreeItem { mode, id, name }
156    }
157
158    /// Create a new TreeItem from a byte vector, split into a mode, id and name, the TreeItem format is:
159    ///
160    /// ```bash
161    /// <mode> <name>\0<binary object ID>
162    /// ```
163    ///
164    pub fn from_bytes(bytes: &[u8]) -> Result<Self, GitError> {
165        let mut parts = bytes.splitn(2, |b| *b == b' ');
166        let mode = parts.next().unwrap();
167        let rest = parts.next().unwrap();
168        let mut parts = rest.splitn(2, |b| *b == b'\0');
169        let raw_name = parts.next().unwrap();
170        let id = parts.next().unwrap();
171
172        let name = if String::from_utf8(raw_name.to_vec()).is_ok() {
173            String::from_utf8(raw_name.to_vec()).unwrap()
174        } else {
175            let (decoded, _, had_errors) = GBK.decode(raw_name);
176            if had_errors {
177                return Err(GitError::InvalidTreeItem(format!(
178                    "Unsupported raw format: {raw_name:?}"
179                )));
180            } else {
181                decoded.to_string()
182            }
183        };
184        Ok(TreeItem {
185            mode: TreeItemMode::tree_item_type_from_bytes(mode)?,
186            id: ObjectHash::from_bytes(id).unwrap(),
187            name,
188        })
189    }
190
191    /// Convert a TreeItem to a byte vector
192    /// ```rust
193    /// use std::str::FromStr;
194    /// use git_internal::internal::object::tree::{TreeItem, TreeItemMode};
195    /// use git_internal::hash::SHA1;
196    ///
197    /// let tree_item = TreeItem::new(
198    ///     TreeItemMode::Blob,
199    ///     SHA1::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
200    ///     "hello-world".to_string(),
201    /// );
202    ///
203    //  let bytes = tree_item.to_bytes();
204    /// ```
205    pub fn to_data(&self) -> Vec<u8> {
206        let mut bytes = Vec::new();
207
208        bytes.extend_from_slice(self.mode.to_bytes());
209        bytes.push(b' ');
210        bytes.extend_from_slice(self.name.as_bytes());
211        bytes.push(b'\0');
212        bytes.extend_from_slice(&self.id.to_data());
213
214        bytes
215    }
216
217    pub fn is_tree(&self) -> bool {
218        self.mode == TreeItemMode::Tree
219    }
220
221    pub fn is_blob(&self) -> bool {
222        self.mode == TreeItemMode::Blob
223    }
224}
225
226/// A tree object is a Git object that represents a directory. It contains a list of entries, one
227/// for each file or directory in the tree.
228#[derive(Eq, Debug, Clone, Serialize, Deserialize, Encode, Decode)]
229pub struct Tree {
230    pub id: ObjectHash,
231    pub tree_items: Vec<TreeItem>,
232}
233
234impl PartialEq for Tree {
235    fn eq(&self, other: &Self) -> bool {
236        self.id == other.id
237    }
238}
239
240impl Display for Tree {
241    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
242        writeln!(f, "Tree: {}", self.id.to_string().blue())?;
243        for item in &self.tree_items {
244            writeln!(f, "{item}")?;
245        }
246        Ok(())
247    }
248}
249
250impl Tree {
251    pub fn from_tree_items(tree_items: Vec<TreeItem>) -> Result<Self, GitError> {
252        if tree_items.is_empty() {
253            return Err(GitError::EmptyTreeItems(
254                "When export tree object to meta, the items is empty"
255                    .parse()
256                    .unwrap(),
257            ));
258        }
259        let mut data = Vec::new();
260        for item in &tree_items {
261            data.extend_from_slice(item.to_data().as_slice());
262        }
263
264        Ok(Tree {
265            id: ObjectHash::from_type_and_data(ObjectType::Tree, &data),
266            tree_items,
267        })
268    }
269
270    /// After the subdirectory is changed, the hash value of the tree is recalculated.
271    pub fn rehash(&mut self) {
272        let mut data = Vec::new();
273        for item in &self.tree_items {
274            data.extend_from_slice(item.to_data().as_slice());
275        }
276        self.id = ObjectHash::from_type_and_data(ObjectType::Tree, &data);
277    }
278}
279
280impl TryFrom<&[u8]> for Tree {
281    type Error = GitError;
282    fn try_from(data: &[u8]) -> Result<Self, Self::Error> {
283        let h = ObjectHash::from_type_and_data(ObjectType::Tree, data);
284        Tree::from_bytes(data, h)
285    }
286}
287impl ObjectTrait for Tree {
288    fn from_bytes(data: &[u8], hash: ObjectHash) -> Result<Self, GitError>
289    where
290        Self: Sized,
291    {
292        let mut tree_items = Vec::new();
293        let mut i = 0;
294        while i < data.len() {
295            // Find the position of the null byte (0x00)
296            if let Some(index) = memchr::memchr(0x00, &data[i..]) {
297                // Calculate the next position
298                let next = i + index + get_hash_kind().size() + 1; // +1 for the null byte
299                if next > data.len() {
300                    return Err(GitError::InvalidTreeObject);
301                } //check bounds TreeItem::from_bytes will panic if out of bounds
302                // Extract the bytes and create a TreeItem
303                let item_data = &data[i..next];
304                let tree_item = TreeItem::from_bytes(item_data)?;
305
306                tree_items.push(tree_item);
307
308                i = next;
309            } else {
310                // If no null byte is found, return an error
311                return Err(GitError::InvalidTreeObject);
312            }
313        }
314
315        Ok(Tree {
316            id: hash,
317            tree_items,
318        })
319    }
320
321    fn get_type(&self) -> ObjectType {
322        ObjectType::Tree
323    }
324
325    fn get_size(&self) -> usize {
326        todo!()
327    }
328
329    fn to_data(&self) -> Result<Vec<u8>, GitError> {
330        let mut data: Vec<u8> = Vec::new();
331
332        for item in &self.tree_items {
333            data.extend_from_slice(item.to_data().as_slice());
334            //data.push(b'\0');
335        }
336
337        Ok(data)
338    }
339}
340
341#[cfg(test)]
342mod tests {
343
344    use crate::hash::{HashKind, ObjectHash, set_hash_kind_for_test};
345    use crate::internal::object::tree::{Tree, TreeItem, TreeItemMode};
346    use std::str::FromStr;
347
348    #[test]
349    fn test_tree_item_new() {
350        let _guard = set_hash_kind_for_test(HashKind::Sha1);
351        let tree_item = TreeItem::new(
352            TreeItemMode::Blob,
353            ObjectHash::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
354            "hello-world".to_string(),
355        );
356
357        assert_eq!(tree_item.mode, TreeItemMode::Blob);
358        assert_eq!(
359            tree_item.id.to_string(),
360            "8ab686eafeb1f44702738c8b0f24f2567c36da6d"
361        );
362    }
363    #[test]
364    fn test_tree_item_new_sha256() {
365        let _guard = set_hash_kind_for_test(HashKind::Sha256);
366        let tree_item = TreeItem::new(
367            TreeItemMode::Blob,
368            ObjectHash::from_str(
369                "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
370            )
371            .unwrap(),
372            "hello-world".to_string(),
373        );
374
375        assert_eq!(tree_item.mode, TreeItemMode::Blob);
376        assert_eq!(
377            tree_item.id.to_string(),
378            "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4"
379        );
380        assert_eq!(tree_item.name, "hello-world");
381    }
382
383    #[test]
384    fn test_tree_item_to_bytes() {
385        let _guard = set_hash_kind_for_test(HashKind::Sha1);
386        let tree_item = TreeItem::new(
387            TreeItemMode::Blob,
388            ObjectHash::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
389            "hello-world".to_string(),
390        );
391
392        let bytes = tree_item.to_data();
393        assert_eq!(
394            bytes,
395            vec![
396                49, 48, 48, 54, 52, 52, 32, 104, 101, 108, 108, 111, 45, 119, 111, 114, 108, 100,
397                0, 138, 182, 134, 234, 254, 177, 244, 71, 2, 115, 140, 139, 15, 36, 242, 86, 124,
398                54, 218, 109
399            ]
400        );
401    }
402    #[test]
403    fn test_tree_item_to_bytes_sha256() {
404        let _guard = set_hash_kind_for_test(HashKind::Sha256);
405        let tree_item = TreeItem::new(
406            TreeItemMode::Blob,
407            ObjectHash::from_str(
408                "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
409            )
410            .unwrap(),
411            "hello-world".to_string(),
412        );
413
414        let bytes = tree_item.to_data();
415
416        // a small helper: convert hex string to byte sequence
417        fn hex_to_bytes(s: &str) -> Vec<u8> {
418            assert!(s.len() % 2 == 0);
419            let mut out = Vec::with_capacity(s.len() / 2);
420            for i in (0..s.len()).step_by(2) {
421                let byte = u8::from_str_radix(&s[i..i + 2], 16).unwrap();
422                out.push(byte);
423            }
424            out
425        }
426
427        // expected encoding: `"100644 hello-world\0" + <32-byte blob hash>`
428        let mut expected = b"100644 hello-world\0".to_vec();
429        expected.extend_from_slice(&hex_to_bytes(
430            "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
431        ));
432
433        assert_eq!(bytes, expected);
434    }
435
436    #[test]
437    fn test_tree_item_from_bytes() {
438        let _guard = set_hash_kind_for_test(HashKind::Sha1);
439        let item = TreeItem::new(
440            TreeItemMode::Blob,
441            ObjectHash::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
442            "hello-world".to_string(),
443        );
444
445        let bytes = item.to_data();
446        let tree_item = TreeItem::from_bytes(bytes.as_slice()).unwrap();
447
448        assert_eq!(tree_item.mode, TreeItemMode::Blob);
449        assert_eq!(tree_item.id.to_string(), item.id.to_string());
450    }
451    #[test]
452    fn test_tree_item_from_bytes_sha256() {
453        let _guard = set_hash_kind_for_test(HashKind::Sha256);
454        let item = TreeItem::new(
455            TreeItemMode::Blob,
456            ObjectHash::from_str(
457                "1111111111111111111111111111111111111111111111111111111111111111",
458            )
459            .unwrap(),
460            "hello-world".to_string(),
461        );
462        let bytes = item.to_data();
463        let tree_item = TreeItem::from_bytes(bytes.as_slice()).unwrap();
464
465        assert_eq!(tree_item.mode, TreeItemMode::Blob);
466        assert_eq!(tree_item.id.to_string(), item.id.to_string());
467    }
468
469    #[test]
470    fn test_from_tree_items() {
471        let _guard = set_hash_kind_for_test(HashKind::Sha1);
472        let item = TreeItem::new(
473            TreeItemMode::Blob,
474            ObjectHash::from_str("17288789afffb273c8c394bc65e87d899b92897b").unwrap(),
475            "hello-world".to_string(),
476        );
477        let tree = Tree::from_tree_items(vec![item]).unwrap();
478        println!("{}", tree.id);
479        assert_eq!(
480            "cf99336fa61439a2f074c7e6de1c1a05579550e2",
481            tree.id.to_string()
482        );
483    }
484    #[test]
485    fn test_from_tree_items_sha256() {
486        let _guard = set_hash_kind_for_test(HashKind::Sha256);
487        let items = vec![
488            TreeItem::new(
489                TreeItemMode::Blob,
490                ObjectHash::from_str(
491                    "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
492                )
493                .unwrap(),
494                "a.txt".to_string(),
495            ),
496            TreeItem::new(
497                TreeItemMode::Blob,
498                ObjectHash::from_str(
499                    "fc2593998f8e1dec9c3a8be11557888134dad90ef5c7a2d6236ed75534c7698e",
500                )
501                .unwrap(),
502                "b.txt".to_string(),
503            ),
504            TreeItem::new(
505                TreeItemMode::Blob,
506                ObjectHash::from_str(
507                    "21513dcb4d6f9eb247db3b4c52158395d94f809cbaa2630bd2a7a474d9b39fab",
508                )
509                .unwrap(),
510                "c.txt".to_string(),
511            ),
512            TreeItem::new(
513                TreeItemMode::Blob,
514                ObjectHash::from_str(
515                    "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
516                )
517                .unwrap(),
518                "hello-world".to_string(),
519            ),
520            TreeItem::new(
521                TreeItemMode::Blob,
522                ObjectHash::from_str(
523                    "9ba9ae56288652bf32f074f922e37d3e95df8920b3cdfc053309595b8f86cbc6",
524                )
525                .unwrap(),
526                "message.txt".to_string(),
527            ),
528        ];
529        let tree = Tree::from_tree_items(items).unwrap();
530        println!("{}", tree.id);
531        assert_eq!(
532            "d712a36aadfb47cabc7aaa90cf9e515773ba3bfc1fe3783730b387ce15c49261",
533            tree.id.to_string()
534        );
535    }
536}