Skip to main content

git_internal/internal/object/
commit.rs

1//! The Commit object is a data structure used to represent a specific version of a project's
2//! files at a particular point in time. In Git, the commit object is a fundamental data structure
3//! that is used to track changes to a repository's files over time. Whenever a developer makes
4//! changes to the files in a repository, they create a new commit object that records those changes.
5//!
6//! Each commit object in Git contains the following information:
7//!
8//! - A unique SHA-1/ SHA-256 hash that identifies the commit.
9//! - The author and committer of the commit (which may be different people).
10//! - The date and time the commit was made.
11//! - A commit message that describes the changes made in the commit.
12//! - A reference to the parent commit or commits (in the case of a merge commit) that the new commit is based on.
13//! - The contents of the files in the repository at the time the commit was made.
14use std::{fmt::Display, str::FromStr};
15
16use bstr::ByteSlice;
17
18use crate::{
19    errors::GitError,
20    hash::ObjectHash,
21    internal::object::{ObjectTrait, ObjectType, signature::Signature},
22};
23
24/// The `Commit` struct is used to represent a commit object.
25///
26/// - The tree object SHA-1/SHA-256 hashpoints to the top level tree for this commit, which reflects the complete
27///   state of the repository at the time of the commit. The tree object in turn points to blobs and
28///   subtrees which represent the files in the repository.
29/// - The parent commit SHAs allow Git to construct a linked list of commits and build the full
30///   commit history. By chaining together commits in this fashion, Git is able to represent the entire
31///   history of a repository with a single commit object at its root.
32/// - The author and committer fields contain the name, email address, timestamp and timezone.
33/// - The message field contains the commit message, which maybe include signed or DCO.
34#[derive(
35    Eq,
36    Debug,
37    Clone,
38    serde::Serialize,
39    serde::Deserialize,
40    rkyv::Archive,
41    rkyv::Serialize,
42    rkyv::Deserialize,
43)]
44pub struct Commit {
45    pub id: ObjectHash,
46    pub tree_id: ObjectHash,
47    pub parent_commit_ids: Vec<ObjectHash>,
48    pub author: Signature,
49    pub committer: Signature,
50    pub message: String,
51}
52impl PartialEq for Commit {
53    fn eq(&self, other: &Self) -> bool {
54        self.id == other.id
55    }
56}
57
58impl Display for Commit {
59    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
60        writeln!(f, "tree: {}", self.tree_id)?;
61        for parent in self.parent_commit_ids.iter() {
62            writeln!(f, "parent: {parent}")?;
63        }
64        writeln!(f, "author {}", self.author)?;
65        writeln!(f, "committer {}", self.committer)?;
66        writeln!(f, "{}", self.message)
67    }
68}
69
70impl Commit {
71    pub fn new(
72        author: Signature,
73        committer: Signature,
74        tree_id: ObjectHash,
75        parent_commit_ids: Vec<ObjectHash>,
76        message: &str,
77    ) -> Commit {
78        let mut commit = Commit {
79            id: ObjectHash::default(),
80            tree_id,
81            parent_commit_ids,
82            author,
83            committer,
84            message: message.to_string(),
85        };
86        // Calculate the hash of the commit object
87        // The hash is calculated from the type and data of the commit object
88        let hash = ObjectHash::from_type_and_data(ObjectType::Commit, &commit.to_data().unwrap());
89        commit.id = hash;
90        commit
91    }
92
93    /// Creates a new commit object from a tree ID and a list of parent commit IDs.
94    /// This function generates the author and committer signatures using the current time
95    /// and a fixed email address.
96    /// It also sets the commit message to the provided string.
97    /// # Arguments
98    /// - `tree_id`: The SHA1/ SHA-256 hash of the tree object that this commit points to.
99    /// - `parent_commit_ids`: A vector of SHA1/ SHA-256 hashes of the parent commits.
100    /// - `message`: A string containing the commit message.
101    /// # Returns
102    /// A new `Commit` object with the specified tree ID, parent commit IDs, and commit message.
103    /// The author and committer signatures are generated using the current time and a fixed email address.
104    pub fn from_tree_id(
105        tree_id: ObjectHash,
106        parent_commit_ids: Vec<ObjectHash>,
107        message: &str,
108    ) -> Commit {
109        let author = Signature::from_data(
110            format!(
111                "author mega <admin@mega.org> {} +0800",
112                chrono::Utc::now().timestamp()
113            )
114            .to_string()
115            .into_bytes(),
116        )
117        .unwrap();
118        let committer = Signature::from_data(
119            format!(
120                "committer mega <admin@mega.org> {} +0800",
121                chrono::Utc::now().timestamp()
122            )
123            .to_string()
124            .into_bytes(),
125        )
126        .unwrap();
127        Commit::new(author, committer, tree_id, parent_commit_ids, message)
128    }
129
130    /// Formats the commit message by extracting the first meaningful line.
131    ///
132    /// If the message contains a PGP signature, it returns the first non-empty line
133    /// after the signature block. Otherwise, it returns the first non-empty line
134    /// in the message. If no such line exists, it returns the original message.
135    pub fn format_message(&self) -> String {
136        let mut lines = self.message.lines();
137
138        // If a PGP signature is present, skip lines until after the signature ends
139        if let Some(pos) = self
140            .message
141            .lines()
142            .position(|line| line.contains("-----END PGP SIGNATURE-----"))
143        {
144            return self
145                .message
146                .lines()
147                .skip(pos + 1)
148                .find(|line| !line.trim().is_empty())
149                .map(|line| line.to_owned())
150                .unwrap_or_else(|| self.message.clone());
151        }
152
153        // Return the first non-empty line from the start
154        lines
155            .find(|line| !line.trim().is_empty())
156            .map(|line| line.to_owned())
157            .unwrap_or_else(|| self.message.clone())
158    }
159}
160
161impl ObjectTrait for Commit {
162    fn from_bytes(data: &[u8], hash: ObjectHash) -> Result<Self, GitError>
163    where
164        Self: Sized,
165    {
166        let mut commit = data;
167        // Find the tree id and remove it from the data
168        let tree_end = commit.find_byte(0x0a).unwrap();
169        let tree_id: ObjectHash = ObjectHash::from_str(
170            String::from_utf8(commit[5..tree_end].to_owned()) // 5 is the length of "tree "
171                .unwrap()
172                .as_str(),
173        )
174        .unwrap();
175        let binding = commit[tree_end + 1..].to_vec(); // Move past the tree id
176        commit = &binding;
177
178        // Find the parent commit ids and remove them from the data
179        let author_begin = commit.find("author").unwrap();
180        // Find all parent commit ids
181        // The parent commit ids are all the lines that start with "parent "
182        // We can use find_iter to find all occurrences of "parent "
183        // and then extract the SHA1/ SHA-256 hashes from them.
184        let parent_commit_ids: Vec<ObjectHash> = commit[..author_begin]
185            .find_iter("parent")
186            .map(|parent| {
187                let parent_end = commit[parent..].find_byte(0x0a).unwrap();
188                ObjectHash::from_str(
189                    // 7 is the length of "parent "
190                    String::from_utf8(commit[parent + 7..parent + parent_end].to_owned())
191                        .unwrap()
192                        .as_str(),
193                )
194                .unwrap()
195            })
196            .collect();
197        let binding = commit[author_begin..].to_vec();
198        commit = &binding;
199
200        // Find the author and committer and remove them from the data
201        // 0x0a is the newline character
202        let author =
203            Signature::from_data(commit[..commit.find_byte(0x0a).unwrap()].to_vec()).unwrap();
204
205        let binding = commit[commit.find_byte(0x0a).unwrap() + 1..].to_vec();
206        commit = &binding;
207        let committer =
208            Signature::from_data(commit[..commit.find_byte(0x0a).unwrap()].to_vec()).unwrap();
209
210        // The rest is the message
211        let message = unsafe {
212            String::from_utf8_unchecked(commit[commit.find_byte(0x0a).unwrap() + 1..].to_vec())
213        };
214        Ok(Commit {
215            id: hash,
216            tree_id,
217            parent_commit_ids,
218            author,
219            committer,
220            message,
221        })
222    }
223
224    fn get_type(&self) -> ObjectType {
225        ObjectType::Commit
226    }
227
228    fn get_size(&self) -> usize {
229        self.to_data().map(|data| data.len()).unwrap_or(0)
230    }
231
232    /// [Git-Internals-Git-Objects](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects)
233    fn to_data(&self) -> Result<Vec<u8>, GitError> {
234        let mut data = Vec::new();
235
236        data.extend(b"tree ");
237        data.extend(self.tree_id.to_string().as_bytes());
238        data.extend(&[0x0a]);
239
240        for parent_tree_id in &self.parent_commit_ids {
241            data.extend(b"parent ");
242            data.extend(parent_tree_id.to_string().as_bytes());
243            data.extend(&[0x0a]);
244        }
245
246        data.extend(self.author.to_data()?);
247        data.extend(&[0x0a]);
248        data.extend(self.committer.to_data()?);
249        data.extend(&[0x0a]);
250        // Important! or Git Server can't parse & reply: unpack-objects abnormal exit
251        // We can move [0x0a] to message instead here.
252        // data.extend(&[0x0a]);
253        data.extend(self.message.as_bytes());
254
255        Ok(data)
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use std::str::FromStr;
262
263    use super::*;
264    use crate::hash::{HashKind, set_hash_kind_for_test};
265
266    /// Create a basic commit object for testing
267    fn basic_commit() -> Commit {
268        let _guard = set_hash_kind_for_test(HashKind::Sha1);
269        let raw_commit = br#"tree 341e54913a3a43069f2927cc0f703e5a9f730df1
270author benjamin.747 <benjamin.747@outlook.com> 1757467768 +0800
271committer benjamin.747 <benjamin.747@outlook.com> 1757491219 +0800
272gpgsig -----BEGIN PGP SIGNATURE-----
273
274 iQJNBAABCAA3FiEEs4MaYUV7JcjxsVMPyqxGczTZ6K4FAmjBMC4ZHGJlbmphbWlu
275 Ljc0N0BvdXRsb29rLmNvbQAKCRDKrEZzNNnorj73EADNpsyLAHsB3NgoeH+uy9Vq
276 G2+LRtlvqv3QMK7vbQUadXHlQYWk25SIk+WJ1kG1AnUy5fqOrLSDTA1ny+qwpH8O
277 +2sKCF/S1wlzqGWjCcRH5/ir9srsGIn9HbNqBjmU22NJ6Dt2jnqoUvtWfPwyqwWg
278 VpjYlj390cFdXTpH5hMvtlmUQB+zCSKtWQW2Ur64h/UsGtllARlACi+KHQQmA2/p
279 FLWNddvfJQpPM597DkGohQTD68g0PqOBhUkOHduHq7VHy68DVW+07bPNXK8JhJ8S
280 4dyV1sZwcVcov0GcKl0wUbEqzy4gf+zV7DQhkfrSRQMBdo5vCWahYj1AbgaTiu8a
281 hscshYDuWWqpxBU/+nCxOPskV29uUG1sRyXp3DqmKJZpnO9CVdw3QaVrqnMEeh2S
282 t/wYRI9aI1A+Mi/DETom5ifTVygMkK+3m1h7pAMOlblFEdZx2sDXPRG2IEUcatr4
283 Jb2+7PUJQXxUQnwHC7xHHxRh6a2h8TfEJfSoEyrgzxZ0CRxJ6XMJaJu0UwZ2xMsx
284 Lgmeu6miB/imwxz5R5RL2yVHbgllSlO5l12AIeBaPoarKXYPSALigQnKCXu5OM3x
285 Jq5qsSGtxdr6S1VgLyYHR4o69bQjzBp9K47J3IXqvrpo/ZiO/6Mspk2ZRWhGj82q
286 e3qERPp5b7+hA+M7jKPyJg==
287 =UeLf
288 -----END PGP SIGNATURE-----
289
290test parse commit from bytes
291"#;
292
293        let hash = ObjectHash::from_str("57d7685c60213a9da465cf900f31933be3a7ee39").unwrap();
294        Commit::from_bytes(raw_commit, hash).unwrap()
295    }
296
297    /// Create a basic commit object with SHA-256 for testing
298    fn basic_commit_sha256() -> Commit {
299        let _guard = set_hash_kind_for_test(HashKind::Sha256);
300        let raw_commit = br#"tree 0250024cf99636335fff1070e4220c5d8f67cb8633572d54b304629ad5382760
301parent 33324c6819589e8eed81d6c72f216469151a0f2dbe7f42ba021d8b63049eb754
302author jackieismpc <jackieismpc@gmail.com> 1764061895 +0800
303committer jackieismpc <jackieismpc@gmail.com> 1764061895 +0800
304gpgsig-sha256 -----BEGIN PGP SIGNATURE-----
305
306 iQIzBAABCAAdFiEEzW/BI6wDXimDk/4lItD7G/h4TUsFAmklcscACgkQItD7G/h4
307 TUtKFRAAtJq9tdl9XdND1ef2dXVQYCkQQlSdNHe2AR/QRVOPI39ZjD5aajRmZoE2
308 rKDenNML1ruiGEm+K3ntRDjus+3QF5Xkhj1D6eImQt6RXyOlo64I+GLRKlzw80Sl
309 hrd+l1eeuS4n46Z0U9fo1Qgc/crSn2VhUtLHJjvRntJoOb1vNreI2Y42Zmal3oVT
310 fQNQ7mqzh3KuWoa8T6nVrLaLH1vl9qhRgkPcIRbFf+ECbB96qykHqcbdHuneSgfx
311 +REpr1cedilkQlX81JrQ8Ntf4QFUPPHALl27/G6oPLT714cflEbvcFw7rNR+ktcD
312 ZJIMu5Cl7X3/v5e0od/hF9uPfiLHckUsOXiMFLfqRdZx/5XeQFWRpq4eYcW7e89e
313 3wJoBA2lCk8SHTBfsprKMpAweXJF9FCjRT5f9Zse2grqH81aQeNJnpSOoCq86oc/
314 nxhi8+rbIbClLCGQoGF7sE/fvmKqcex++JnXHcHTtK002Gnh3oHX07sbahlcGuYY
315 kg4QhXiLTQ5GfXnEnTPdFqbOVG02vEEsNeRgkmOz4c8Pm1FTDyOkuXd/Igvy7A9R
316 MZwQcJ6E4MnsMnoH8FKswGqCD7ftwtJtRzryORBVzvPKALufIXDVLyBbae9dxdej
317 bcpUK1bGtDljlwNtbLIOu+F1y2OVh7Tn3zxaQLcEhbUe2tP6rGk=
318 =nJMO
319 -----END PGP SIGNATURE-----
320
321signed sha256 commit for test"#;
322        let hash = ObjectHash::from_str(
323            "ed43b50437e260a4d8fedacbaa38bad28b54cc424925e4180d9f186afaa0508c",
324        )
325        .unwrap();
326        Commit::from_bytes(raw_commit.as_bytes(), hash).unwrap()
327    }
328
329    /// Test creating a Commit from bytes with PGP signature
330    #[test]
331    fn test_from_bytes_with_gpgsig() {
332        let commit = basic_commit();
333
334        assert_eq!(
335            commit.id,
336            ObjectHash::from_str("57d7685c60213a9da465cf900f31933be3a7ee39").unwrap()
337        );
338
339        assert_eq!(
340            commit.tree_id,
341            ObjectHash::from_str("341e54913a3a43069f2927cc0f703e5a9f730df1").unwrap()
342        );
343
344        assert_eq!(commit.author.name, "benjamin.747");
345        assert_eq!(commit.author.email, "benjamin.747@outlook.com");
346
347        assert_eq!(commit.committer.name, "benjamin.747");
348
349        // check message content(must contains gpgsig and content)
350        assert!(commit.message.contains("-----BEGIN PGP SIGNATURE-----"));
351        assert!(commit.message.contains("-----END PGP SIGNATURE-----"));
352        assert!(commit.message.contains("test parse commit from bytes"));
353    }
354
355    /// Test creating a Commit from bytes with SHA-256
356    #[test]
357    fn test_from_bytes_with_gpgsig_sha256() {
358        let commit = basic_commit_sha256();
359        assert_eq!(
360            commit.id,
361            ObjectHash::from_str(
362                "ed43b50437e260a4d8fedacbaa38bad28b54cc424925e4180d9f186afaa0508c"
363            )
364            .unwrap()
365        );
366        assert_eq!(
367            commit.tree_id,
368            ObjectHash::from_str(
369                "0250024cf99636335fff1070e4220c5d8f67cb8633572d54b304629ad5382760"
370            )
371            .unwrap()
372        );
373        assert_eq!(commit.author.name, "jackieismpc");
374        assert_eq!(commit.author.email, "jackieismpc@gmail.com");
375        assert_eq!(commit.committer.name, "jackieismpc");
376        // // check message content (must contain gpgsig-sha256 and content)
377        assert!(commit.message.contains("-----BEGIN PGP SIGNATURE-----"));
378        assert!(commit.message.contains("-----END PGP SIGNATURE-----"));
379        assert!(commit.message.contains("signed sha256 commit for test"));
380    }
381
382    /// Test formatting commit message with PGP signature
383    #[test]
384    fn test_format_message_with_pgp_signature() {
385        let _guard = set_hash_kind_for_test(HashKind::Sha1);
386        let commit = basic_commit();
387        assert_eq!(commit.format_message(), "test parse commit from bytes");
388    }
389
390    /// Test formatting commit message with SHA-256 PGP signature
391    #[test]
392    fn test_format_message_with_pgp_signature_sha256() {
393        let _guard = set_hash_kind_for_test(HashKind::Sha256);
394        let commit = basic_commit_sha256();
395        assert_eq!(commit.format_message(), "signed sha256 commit for test");
396    }
397}