git_internal/internal/object/tree.rs
1//! In Git, a tree object is used to represent the state of a directory at a specific point in time.
2//! It stores information about the files and directories within that directory, including their names,
3//! permissions, and the IDs of the objects that represent their contents.
4//!
5//! A tree object can contain other tree objects as well as blob objects, which represent the contents
6//! of individual files. The object IDs of these child objects are stored within the tree object itself.
7//!
8//! When you make a commit in Git, you create a new tree object that represents the state of the
9//! repository at that point in time. The parent of the new commit is typically the tree object
10//! representing the previous state of the repository.
11//!
12//! Git uses the tree object to efficiently store and manage the contents of a repository. By
13//! representing the contents of a directory as a tree object, Git can quickly determine which files
14//! have been added, modified, or deleted between two points in time. This allows Git to perform
15//! operations like merging and rebasing more quickly and accurately.
16//!
17use std::fmt::Display;
18
19use bincode::{Decode, Encode};
20use colored::Colorize;
21use encoding_rs::GBK;
22use serde::{Deserialize, Serialize};
23
24use crate::{
25 errors::GitError,
26 hash::{ObjectHash, get_hash_kind},
27 internal::object::{ObjectTrait, ObjectType},
28};
29
30/// In Git, the mode field in a tree object's entry specifies the type of the object represented by
31/// that entry. The mode is a three-digit octal number that encodes both the permissions and the
32/// type of the object. The first digit specifies the object type, and the remaining two digits
33/// specify the file mode or permissions.
34#[derive(PartialEq, Eq, Debug, Clone, Copy, Serialize, Deserialize, Hash, Encode, Decode)]
35pub enum TreeItemMode {
36 Blob,
37 BlobExecutable,
38 Tree,
39 Commit,
40 Link,
41}
42
43impl Display for TreeItemMode {
44 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
45 let _print = match *self {
46 TreeItemMode::Blob => "blob",
47 TreeItemMode::BlobExecutable => "blob executable",
48 TreeItemMode::Tree => "tree",
49 TreeItemMode::Commit => "commit",
50 TreeItemMode::Link => "link",
51 };
52
53 write!(f, "{}", String::from(_print).blue())
54 }
55}
56
57impl TreeItemMode {
58 /// Convert a 32-bit mode to a TreeItemType
59 ///
60 /// |0100000000000000| (040000)| Directory|
61 /// |1000000110100100| (100644)| Regular non-executable file|
62 /// |1000000110110100| (100664)| Regular non-executable group-writeable file|
63 /// |1000000111101101| (100755)| Regular executable file|
64 /// |1010000000000000| (120000)| Symbolic link|
65 /// |1110000000000000| (160000)| Gitlink|
66 /// ---
67 /// # GitLink
68 /// Gitlink, also known as a submodule, is a feature in Git that allows you to include a Git
69 /// repository as a subdirectory within another Git repository. This is useful when you want to
70 /// incorporate code from another project into your own project, without having to manually copy
71 /// the code into your repository.
72 ///
73 /// When you add a submodule to your Git repository, Git stores a reference to the other
74 /// repository at a specific commit. This means that your repository will always point to a
75 /// specific version of the other repository, even if changes are made to the submodule's code
76 /// in the future.
77 ///
78 /// To work with a submodule in Git, you use commands like git submodule add, git submodule
79 /// update, and git submodule init. These commands allow you to add a submodule to your repository,
80 /// update it to the latest version, and initialize it for use.
81 ///
82 /// Submodules can be a powerful tool for managing dependencies between different projects and
83 /// components. However, they can also add complexity to your workflow, so it's important to
84 /// understand how they work and when to use them.
85 pub fn tree_item_type_from_bytes(mode: &[u8]) -> Result<TreeItemMode, GitError> {
86 Ok(match mode {
87 b"40000" => TreeItemMode::Tree,
88 b"100644" => TreeItemMode::Blob,
89 b"100755" => TreeItemMode::BlobExecutable,
90 b"120000" => TreeItemMode::Link,
91 b"160000" => TreeItemMode::Commit,
92 b"100664" => TreeItemMode::Blob,
93 b"100640" => TreeItemMode::Blob,
94 _ => {
95 return Err(GitError::InvalidTreeItem(
96 String::from_utf8(mode.to_vec()).unwrap(),
97 ));
98 }
99 })
100 }
101
102 /// 32-bit mode, split into (high to low bits):
103 /// - 4-bit object type: valid values in binary are 1000 (regular file), 1010 (symbolic link) and 1110 (gitlink)
104 /// - 3-bit unused
105 /// - 9-bit unix permission: Only 0755 and 0644 are valid for regular files. Symbolic links and gitlink have value 0 in this field.
106 pub fn to_bytes(self) -> &'static [u8] {
107 match self {
108 TreeItemMode::Blob => b"100644",
109 TreeItemMode::BlobExecutable => b"100755",
110 TreeItemMode::Link => b"120000",
111 TreeItemMode::Tree => b"40000",
112 TreeItemMode::Commit => b"160000",
113 }
114 }
115}
116
117/// A tree object contains a list of entries, one for each file or directory in the tree. Each entry
118/// in the file represents an entry in the tree, and each entry has the following format:
119///
120/// ```bash
121/// <mode> <name>\0<binary object ID>
122/// ```
123/// - `<mode>` is the mode of the object, represented as a six-digit octal number. The first digit
124/// represents the object type (tree, blob, etc.), and the remaining digits represent the file mode or permissions.
125/// - `<name>` is the name of the object.
126/// - `\0` is a null byte separator.
127/// - `<binary object ID>` is the ID of the object that represents the contents of the file or
128/// directory, represented as a binary SHA-1 hash.
129///
130/// # Example
131/// ```bash
132/// 100644 hello-world\0<blob object ID>
133/// 040000 data\0<tree object ID>
134/// ```
135#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Hash, Encode, Decode)]
136pub struct TreeItem {
137 pub mode: TreeItemMode,
138 pub id: ObjectHash,
139 pub name: String,
140}
141
142impl Display for TreeItem {
143 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
144 write!(
145 f,
146 "{} {} {}",
147 self.mode,
148 self.name,
149 self.id.to_string().blue()
150 )
151 }
152}
153
154impl TreeItem {
155 /// Creates a new [`TreeItem`] with the given mode, object ID, and name.
156 pub fn new(mode: TreeItemMode, id: ObjectHash, name: String) -> Self {
157 TreeItem { mode, id, name }
158 }
159
160 /// Create a new TreeItem from a byte vector, split into a mode, id and name, the TreeItem format is:
161 ///
162 /// ```bash
163 /// <mode> <name>\0<binary object ID>
164 /// ```
165 ///
166 pub fn from_bytes(bytes: &[u8]) -> Result<Self, GitError> {
167 let mut parts = bytes.splitn(2, |b| *b == b' ');
168 let mode = parts.next().unwrap();
169 let rest = parts.next().unwrap();
170 let mut parts = rest.splitn(2, |b| *b == b'\0');
171 let raw_name = parts.next().unwrap();
172 let id = parts.next().unwrap();
173
174 let name = if String::from_utf8(raw_name.to_vec()).is_ok() {
175 String::from_utf8(raw_name.to_vec()).unwrap()
176 } else {
177 let (decoded, _, had_errors) = GBK.decode(raw_name);
178 if had_errors {
179 return Err(GitError::InvalidTreeItem(format!(
180 "Unsupported raw format: {raw_name:?}"
181 )));
182 } else {
183 decoded.to_string()
184 }
185 };
186 Ok(TreeItem {
187 mode: TreeItemMode::tree_item_type_from_bytes(mode)?,
188 id: ObjectHash::from_bytes(id).unwrap(),
189 name,
190 })
191 }
192
193 /// Convert a TreeItem to a byte vector
194 /// ```rust
195 /// use std::str::FromStr;
196 /// use git_internal::internal::object::tree::{TreeItem, TreeItemMode};
197 /// use git_internal::hash::ObjectHash;
198 ///
199 /// let tree_item = TreeItem::new(
200 /// TreeItemMode::Blob,
201 /// ObjectHash::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(),
202 /// "hello-world".to_string(),
203 /// );
204 ///
205 // let bytes = tree_item.to_bytes();
206 /// ```
207 pub fn to_data(&self) -> Vec<u8> {
208 let mut bytes = Vec::new();
209
210 bytes.extend_from_slice(self.mode.to_bytes());
211 bytes.push(b' ');
212 bytes.extend_from_slice(self.name.as_bytes());
213 bytes.push(b'\0');
214 bytes.extend_from_slice(&self.id.to_data());
215
216 bytes
217 }
218
219 /// Returns `true` if this item represents a subdirectory (tree).
220 pub fn is_tree(&self) -> bool {
221 self.mode == TreeItemMode::Tree
222 }
223
224 /// Returns `true` if this item represents a regular file (blob).
225 pub fn is_blob(&self) -> bool {
226 self.mode == TreeItemMode::Blob
227 }
228}
229
230/// A tree object is a Git object that represents a directory. It contains a list of entries, one
231/// for each file or directory in the tree.
232#[derive(Eq, Debug, Clone, Serialize, Deserialize, Encode, Decode)]
233pub struct Tree {
234 pub id: ObjectHash,
235 pub tree_items: Vec<TreeItem>,
236}
237
238impl PartialEq for Tree {
239 fn eq(&self, other: &Self) -> bool {
240 self.id == other.id
241 }
242}
243
244impl Display for Tree {
245 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
246 writeln!(f, "Tree: {}", self.id.to_string().blue())?;
247 for item in &self.tree_items {
248 writeln!(f, "{item}")?;
249 }
250 Ok(())
251 }
252}
253
254impl Tree {
255 /// Create a new Tree from a list of TreeItems
256 pub fn from_tree_items(tree_items: Vec<TreeItem>) -> Result<Self, GitError> {
257 if tree_items.is_empty() {
258 return Err(GitError::EmptyTreeItems(
259 "When export tree object to meta, the items is empty"
260 .parse()
261 .unwrap(),
262 ));
263 }
264 let mut data = Vec::new();
265 for item in &tree_items {
266 data.extend_from_slice(item.to_data().as_slice());
267 }
268
269 Ok(Tree {
270 id: ObjectHash::from_type_and_data(ObjectType::Tree, &data),
271 tree_items,
272 })
273 }
274
275 /// After the subdirectory is changed, the hash value of the tree is recalculated.
276 pub fn rehash(&mut self) {
277 let mut data = Vec::new();
278 for item in &self.tree_items {
279 data.extend_from_slice(item.to_data().as_slice());
280 }
281 self.id = ObjectHash::from_type_and_data(ObjectType::Tree, &data);
282 }
283}
284
285impl TryFrom<&[u8]> for Tree {
286 type Error = GitError;
287 fn try_from(data: &[u8]) -> Result<Self, Self::Error> {
288 let h = ObjectHash::from_type_and_data(ObjectType::Tree, data);
289 Tree::from_bytes(data, h)
290 }
291}
292impl ObjectTrait for Tree {
293 fn from_bytes(data: &[u8], hash: ObjectHash) -> Result<Self, GitError>
294 where
295 Self: Sized,
296 {
297 let mut tree_items = Vec::new();
298 let mut i = 0;
299 while i < data.len() {
300 // Find the position of the null byte (0x00)
301 if let Some(index) = memchr::memchr(0x00, &data[i..]) {
302 // Calculate the next position
303 let next = i + index + get_hash_kind().size() + 1; // +1 for the null byte
304 if next > data.len() {
305 return Err(GitError::InvalidTreeObject);
306 } //check bounds TreeItem::from_bytes will panic if out of bounds
307 // Extract the bytes and create a TreeItem
308 let item_data = &data[i..next];
309 let tree_item = TreeItem::from_bytes(item_data)?;
310
311 tree_items.push(tree_item);
312
313 i = next;
314 } else {
315 // If no null byte is found, return an error
316 return Err(GitError::InvalidTreeObject);
317 }
318 }
319
320 Ok(Tree {
321 id: hash,
322 tree_items,
323 })
324 }
325
326 fn get_type(&self) -> ObjectType {
327 ObjectType::Tree
328 }
329
330 fn get_size(&self) -> usize {
331 self.to_data().map(|data| data.len()).unwrap_or(0)
332 }
333
334 fn to_data(&self) -> Result<Vec<u8>, GitError> {
335 let mut data: Vec<u8> = Vec::new();
336
337 for item in &self.tree_items {
338 data.extend_from_slice(item.to_data().as_slice());
339 //data.push(b'\0');
340 }
341
342 Ok(data)
343 }
344}
345
346#[cfg(test)]
347mod tests {
348
349 use std::str::FromStr;
350
351 use crate::{
352 hash::{HashKind, ObjectHash, set_hash_kind_for_test},
353 internal::object::tree::{Tree, TreeItem, TreeItemMode},
354 };
355
356 /// Helper: roundtrip a single TreeItem under a given hash kind.
357 fn tree_item_round_trip(kind: HashKind, id_hex: &str) {
358 let _guard = set_hash_kind_for_test(kind);
359 let item = TreeItem::new(
360 TreeItemMode::Blob,
361 ObjectHash::from_str(id_hex).unwrap(),
362 "hello-world".to_string(),
363 );
364
365 let bytes = item.to_data();
366 let parsed = TreeItem::from_bytes(&bytes).unwrap();
367
368 assert_eq!(parsed.mode, TreeItemMode::Blob);
369 assert_eq!(parsed.id, item.id);
370 assert_eq!(parsed.name, item.name);
371 }
372
373 /// TreeItem new/to_data/from_bytes roundtrip with SHA-1.
374 #[test]
375 fn tree_item_round_trip_sha1() {
376 tree_item_round_trip(HashKind::Sha1, "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
377 }
378
379 /// TreeItem new/to_data/from_bytes roundtrip with SHA-256.
380 #[test]
381 fn tree_item_round_trip_sha256() {
382 tree_item_round_trip(
383 HashKind::Sha256,
384 "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
385 );
386 }
387
388 /// Helper: build a tree from items and assert the resulting ID.
389 fn tree_round_trip(kind: HashKind, items: Vec<(&str, &str)>, expected_id: &str) {
390 let _guard = set_hash_kind_for_test(kind);
391 let tree_items = items
392 .into_iter()
393 .map(|(name, id_hex)| {
394 TreeItem::new(
395 TreeItemMode::Blob,
396 ObjectHash::from_str(id_hex).unwrap(),
397 name.to_string(),
398 )
399 })
400 .collect::<Vec<_>>();
401 let tree = Tree::from_tree_items(tree_items).unwrap();
402 assert_eq!(tree.id.to_string(), expected_id);
403 }
404
405 /// Tree construction from items (SHA-1).
406 #[test]
407 fn tree_from_items_sha1() {
408 tree_round_trip(
409 HashKind::Sha1,
410 vec![("hello-world", "17288789afffb273c8c394bc65e87d899b92897b")],
411 "cf99336fa61439a2f074c7e6de1c1a05579550e2",
412 );
413 }
414
415 /// Tree construction from items (SHA-256).
416 #[test]
417 fn tree_from_items_sha256() {
418 tree_round_trip(
419 HashKind::Sha256,
420 vec![
421 (
422 "a.txt",
423 "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
424 ),
425 (
426 "b.txt",
427 "fc2593998f8e1dec9c3a8be11557888134dad90ef5c7a2d6236ed75534c7698e",
428 ),
429 (
430 "c.txt",
431 "21513dcb4d6f9eb247db3b4c52158395d94f809cbaa2630bd2a7a474d9b39fab",
432 ),
433 (
434 "hello-world",
435 "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
436 ),
437 (
438 "message.txt",
439 "9ba9ae56288652bf32f074f922e37d3e95df8920b3cdfc053309595b8f86cbc6",
440 ),
441 ],
442 "d712a36aadfb47cabc7aaa90cf9e515773ba3bfc1fe3783730b387ce15c49261",
443 );
444 }
445}