git_internal/internal/object/
tree.rs

1//! In Git, a tree object is used to represent the state of a directory at a specific point in time.
2//! It stores information about the files and directories within that directory, including their names,
3//! permissions, and the IDs of the objects that represent their contents.
4//!
5//! A tree object can contain other tree objects as well as blob objects, which represent the contents
6//! of individual files. The object IDs of these child objects are stored within the tree object itself.
7//!
8//! When you make a commit in Git, you create a new tree object that represents the state of the
9//! repository at that point in time. The parent of the new commit is typically the tree object
10//! representing the previous state of the repository.
11//!
12//! Git uses the tree object to efficiently store and manage the contents of a repository. By
13//! representing the contents of a directory as a tree object, Git can quickly determine which files
14//! have been added, modified, or deleted between two points in time. This allows Git to perform
15//! operations like merging and rebasing more quickly and accurately.
16//!
17use crate::errors::GitError;
18use crate::hash::SHA1;
19use crate::internal::object::ObjectTrait;
20use crate::internal::object::ObjectType;
21use bincode::{Decode, Encode};
22use colored::Colorize;
23use encoding_rs::GBK;
24use serde::Deserialize;
25use serde::Serialize;
26use std::fmt::Display;
27
28/// In Git, the mode field in a tree object's entry specifies the type of the object represented by
29/// that entry. The mode is a three-digit octal number that encodes both the permissions and the
30/// type of the object. The first digit specifies the object type, and the remaining two digits
31/// specify the file mode or permissions.
32#[derive(PartialEq, Eq, Debug, Clone, Copy, Serialize, Deserialize, Hash, Encode, Decode)]
33pub enum TreeItemMode {
34    Blob,
35    BlobExecutable,
36    Tree,
37    Commit,
38    Link,
39}
40
41impl Display for TreeItemMode {
42    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
43        let _print = match *self {
44            TreeItemMode::Blob => "blob",
45            TreeItemMode::BlobExecutable => "blob executable",
46            TreeItemMode::Tree => "tree",
47            TreeItemMode::Commit => "commit",
48            TreeItemMode::Link => "link",
49        };
50
51        write!(f, "{}", String::from(_print).blue())
52    }
53}
54
55impl TreeItemMode {
56    /// Convert a 32-bit mode to a TreeItemType
57    ///
58    /// |0100000000000000| (040000)| Directory|
59    /// |1000000110100100| (100644)| Regular non-executable file|
60    /// |1000000110110100| (100664)| Regular non-executable group-writeable file|
61    /// |1000000111101101| (100755)| Regular executable file|
62    /// |1010000000000000| (120000)| Symbolic link|
63    /// |1110000000000000| (160000)| Gitlink|
64    /// ---
65    /// # GitLink
66    /// Gitlink, also known as a submodule, is a feature in Git that allows you to include a Git
67    /// repository as a subdirectory within another Git repository. This is useful when you want to
68    /// incorporate code from another project into your own project, without having to manually copy
69    /// the code into your repository.
70    ///
71    /// When you add a submodule to your Git repository, Git stores a reference to the other
72    /// repository at a specific commit. This means that your repository will always point to a
73    /// specific version of the other repository, even if changes are made to the submodule's code
74    /// in the future.
75    ///
76    /// To work with a submodule in Git, you use commands like git submodule add, git submodule
77    /// update, and git submodule init. These commands allow you to add a submodule to your repository,
78    /// update it to the latest version, and initialize it for use.
79    ///
80    /// Submodules can be a powerful tool for managing dependencies between different projects and
81    /// components. However, they can also add complexity to your workflow, so it's important to
82    /// understand how they work and when to use them.
83    pub fn tree_item_type_from_bytes(mode: &[u8]) -> Result<TreeItemMode, GitError> {
84        Ok(match mode {
85            b"40000" => TreeItemMode::Tree,
86            b"100644" => TreeItemMode::Blob,
87            b"100755" => TreeItemMode::BlobExecutable,
88            b"120000" => TreeItemMode::Link,
89            b"160000" => TreeItemMode::Commit,
90            b"100664" => TreeItemMode::Blob,
91            b"100640" => TreeItemMode::Blob,
92            _ => {
93                return Err(GitError::InvalidTreeItem(
94                    String::from_utf8(mode.to_vec()).unwrap(),
95                ));
96            }
97        })
98    }
99
100    /// 32-bit mode, split into (high to low bits):
101    /// - 4-bit object type: valid values in binary are 1000 (regular file), 1010 (symbolic link) and 1110 (gitlink)
102    /// - 3-bit unused
103    /// - 9-bit unix permission: Only 0755 and 0644 are valid for regular files. Symbolic links and gitlink have value 0 in this field.
104    pub fn to_bytes(self) -> &'static [u8] {
105        match self {
106            TreeItemMode::Blob => b"100644",
107            TreeItemMode::BlobExecutable => b"100755",
108            TreeItemMode::Link => b"120000",
109            TreeItemMode::Tree => b"40000",
110            TreeItemMode::Commit => b"160000",
111        }
112    }
113}
114
115/// A tree object contains a list of entries, one for each file or directory in the tree. Each entry
116/// in the file represents an entry in the tree, and each entry has the following format:
117///
118/// ```bash
119/// <mode> <name>\0<binary object ID>
120/// ```
121/// - `<mode>` is the mode of the object, represented as a six-digit octal number. The first digit
122///   represents the object type (tree, blob, etc.), and the remaining digits represent the file mode or permissions.
123/// - `<name>` is the name of the object.
124/// - `\0` is a null byte separator.
125/// - `<binary object ID>` is the ID of the object that represents the contents of the file or
126///   directory, represented as a binary SHA-1 hash.
127///
128/// # Example
129/// ```bash
130/// 100644 hello-world\0<blob object ID>
131/// 040000 data\0<tree object ID>
132/// ```
133#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Hash, Encode, Decode)]
134pub struct TreeItem {
135    pub mode: TreeItemMode,
136    pub id: SHA1,
137    pub name: String,
138}
139
140impl Display for TreeItem {
141    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
142        write!(
143            f,
144            "{} {} {}",
145            self.mode,
146            self.name,
147            self.id.to_string().blue()
148        )
149    }
150}
151
152impl TreeItem {
153    // Create a new TreeItem from a mode, id and name
154    pub fn new(mode: TreeItemMode, id: SHA1, name: String) -> Self {
155        TreeItem { mode, id, name }
156    }
157
158    /// Create a new TreeItem from a byte vector, split into a mode, id and name, the TreeItem format is:
159    ///
160    /// ```bash
161    /// <mode> <name>\0<binary object ID>
162    /// ```
163    ///
164    pub fn from_bytes(bytes: &[u8]) -> Result<Self, GitError> {
165        let mut parts = bytes.splitn(2, |b| *b == b' ');
166        let mode = parts.next().unwrap();
167        let rest = parts.next().unwrap();
168        let mut parts = rest.splitn(2, |b| *b == b'\0');
169        let raw_name = parts.next().unwrap();
170        let id = parts.next().unwrap();
171
172        let name = if String::from_utf8(raw_name.to_vec()).is_ok() {
173            String::from_utf8(raw_name.to_vec()).unwrap()
174        } else {
175            let (decoded, _, had_errors) = GBK.decode(raw_name);
176            if had_errors {
177                return Err(GitError::InvalidTreeItem(format!(
178                    "Unsupported raw format: {raw_name:?}"
179                )));
180            } else {
181                decoded.to_string()
182            }
183        };
184        Ok(TreeItem {
185            mode: TreeItemMode::tree_item_type_from_bytes(mode)?,
186            id: SHA1::from_bytes(id),
187            name,
188        })
189    }
190
191    /// Convert a TreeItem to a byte vector
192    /// ```rust
193    /// use std::str::FromStr;
194    /// use git_internal::internal::object::tree::{TreeItem, TreeItemMode};
195    /// use git_internal::hash::SHA1;
196    ///
197    /// let tree_item = TreeItem::new(
198    ///     TreeItemMode::Blob,
199    ///     SHA1::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
200    ///     "hello-world".to_string(),
201    /// );
202    ///
203    //  let bytes = tree_item.to_bytes();
204    /// ```
205    pub fn to_data(&self) -> Vec<u8> {
206        let mut bytes = Vec::new();
207
208        bytes.extend_from_slice(self.mode.to_bytes());
209        bytes.push(b' ');
210        bytes.extend_from_slice(self.name.as_bytes());
211        bytes.push(b'\0');
212        bytes.extend_from_slice(&self.id.to_data());
213
214        bytes
215    }
216
217    pub fn is_tree(&self) -> bool {
218        self.mode == TreeItemMode::Tree
219    }
220
221    pub fn is_blob(&self) -> bool {
222        self.mode == TreeItemMode::Blob
223    }
224}
225
226/// A tree object is a Git object that represents a directory. It contains a list of entries, one
227/// for each file or directory in the tree.
228#[derive(Eq, Debug, Clone, Serialize, Deserialize, Encode, Decode)]
229pub struct Tree {
230    pub id: SHA1,
231    pub tree_items: Vec<TreeItem>,
232}
233
234impl PartialEq for Tree {
235    fn eq(&self, other: &Self) -> bool {
236        self.id == other.id
237    }
238}
239
240impl Display for Tree {
241    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
242        writeln!(f, "Tree: {}", self.id.to_string().blue())?;
243        for item in &self.tree_items {
244            writeln!(f, "{item}")?;
245        }
246        Ok(())
247    }
248}
249
250impl Tree {
251    pub fn from_tree_items(tree_items: Vec<TreeItem>) -> Result<Self, GitError> {
252        if tree_items.is_empty() {
253            return Err(GitError::EmptyTreeItems(
254                "When export tree object to meta, the items is empty"
255                    .parse()
256                    .unwrap(),
257            ));
258        }
259        let mut data = Vec::new();
260        for item in &tree_items {
261            data.extend_from_slice(item.to_data().as_slice());
262        }
263
264        Ok(Tree {
265            id: SHA1::from_type_and_data(ObjectType::Tree, &data),
266            tree_items,
267        })
268    }
269
270    /// After the subdirectory is changed, the hash value of the tree is recalculated.
271    pub fn rehash(&mut self) {
272        let mut data = Vec::new();
273        for item in &self.tree_items {
274            data.extend_from_slice(item.to_data().as_slice());
275        }
276        self.id = SHA1::from_type_and_data(ObjectType::Tree, &data);
277    }
278}
279
280impl TryFrom<&[u8]> for Tree {
281    type Error = GitError;
282    fn try_from(data: &[u8]) -> Result<Self, Self::Error> {
283        let h = SHA1::from_type_and_data(ObjectType::Tree, data);
284        Tree::from_bytes(data, h)
285    }
286}
287impl ObjectTrait for Tree {
288    fn from_bytes(data: &[u8], hash: SHA1) -> Result<Self, GitError>
289    where
290        Self: Sized,
291    {
292        let mut tree_items = Vec::new();
293        let mut i = 0;
294        while i < data.len() {
295            // Find the position of the null byte (0x00)
296            if let Some(index) = memchr::memchr(0x00, &data[i..]) {
297                // Calculate the next position
298                let next = i + index + 21;
299
300                // Extract the bytes and create a TreeItem
301                let item_data = &data[i..next];
302                let tree_item = TreeItem::from_bytes(item_data)?;
303
304                tree_items.push(tree_item);
305
306                i = next;
307            } else {
308                // If no null byte is found, return an error
309                return Err(GitError::InvalidTreeObject);
310            }
311        }
312
313        Ok(Tree {
314            id: hash,
315            tree_items,
316        })
317    }
318
319    fn get_type(&self) -> ObjectType {
320        ObjectType::Tree
321    }
322
323    fn get_size(&self) -> usize {
324        todo!()
325    }
326
327    fn to_data(&self) -> Result<Vec<u8>, GitError> {
328        let mut data: Vec<u8> = Vec::new();
329
330        for item in &self.tree_items {
331            data.extend_from_slice(item.to_data().as_slice());
332            //data.push(b'\0');
333        }
334
335        Ok(data)
336    }
337}
338
339#[cfg(test)]
340mod tests {
341
342    use std::str::FromStr;
343
344    use crate::hash::SHA1;
345    use crate::internal::object::tree::{Tree, TreeItem, TreeItemMode};
346
347    #[test]
348    fn test_tree_item_new() {
349        let tree_item = TreeItem::new(
350            TreeItemMode::Blob,
351            SHA1::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
352            "hello-world".to_string(),
353        );
354
355        assert_eq!(tree_item.mode, TreeItemMode::Blob);
356        assert_eq!(
357            tree_item.id.to_string(),
358            "8ab686eafeb1f44702738c8b0f24f2567c36da6d"
359        );
360    }
361
362    #[test]
363    fn test_tree_item_to_bytes() {
364        let tree_item = TreeItem::new(
365            TreeItemMode::Blob,
366            SHA1::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
367            "hello-world".to_string(),
368        );
369
370        let bytes = tree_item.to_data();
371        assert_eq!(
372            bytes,
373            vec![
374                49, 48, 48, 54, 52, 52, 32, 104, 101, 108, 108, 111, 45, 119, 111, 114, 108, 100,
375                0, 138, 182, 134, 234, 254, 177, 244, 71, 2, 115, 140, 139, 15, 36, 242, 86, 124,
376                54, 218, 109
377            ]
378        );
379    }
380
381    #[test]
382    fn test_tree_item_from_bytes() {
383        let item = TreeItem::new(
384            TreeItemMode::Blob,
385            SHA1::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
386            "hello-world".to_string(),
387        );
388
389        let bytes = item.to_data();
390        let tree_item = TreeItem::from_bytes(bytes.as_slice()).unwrap();
391
392        assert_eq!(tree_item.mode, TreeItemMode::Blob);
393        assert_eq!(tree_item.id.to_string(), item.id.to_string());
394    }
395
396    #[test]
397    fn test_from_tree_items() {
398        let item = TreeItem::new(
399            TreeItemMode::Blob,
400            SHA1::from_str("17288789afffb273c8c394bc65e87d899b92897b").unwrap(),
401            "hello-world".to_string(),
402        );
403        let tree = Tree::from_tree_items(vec![item]).unwrap();
404        println!("{}", tree.id);
405        assert_eq!(
406            "cf99336fa61439a2f074c7e6de1c1a05579550e2",
407            tree.id.to_string()
408        );
409    }
410}