Skip to main content

git_internal/internal/object/
commit.rs

1//! The Commit object is a data structure used to represent a specific version of a project's
2//! files at a particular point in time. In Git, the commit object is a fundamental data structure
3//! that is used to track changes to a repository's files over time. Whenever a developer makes
4//! changes to the files in a repository, they create a new commit object that records those changes.
5//!
6//! Each commit object in Git contains the following information:
7//!
8//! - A unique SHA-1/ SHA-256 hash that identifies the commit.
9//! - The author and committer of the commit (which may be different people).
10//! - The date and time the commit was made.
11//! - A commit message that describes the changes made in the commit.
12//! - A reference to the parent commit or commits (in the case of a merge commit) that the new commit is based on.
13//! - The contents of the files in the repository at the time the commit was made.
14use std::{fmt::Display, str::FromStr};
15
16use bincode::{Decode, Encode};
17use bstr::ByteSlice;
18use serde::{Deserialize, Serialize};
19
20use crate::{
21    errors::GitError,
22    hash::ObjectHash,
23    internal::object::{ObjectTrait, ObjectType, signature::Signature},
24};
25
26/// The `Commit` struct is used to represent a commit object.
27///
28/// - The tree object SHA-1/SHA-256 hashpoints to the top level tree for this commit, which reflects the complete
29///   state of the repository at the time of the commit. The tree object in turn points to blobs and
30///   subtrees which represent the files in the repository.
31/// - The parent commit SHAs allow Git to construct a linked list of commits and build the full
32///   commit history. By chaining together commits in this fashion, Git is able to represent the entire
33///   history of a repository with a single commit object at its root.
34/// - The author and committer fields contain the name, email address, timestamp and timezone.
35/// - The message field contains the commit message, which maybe include signed or DCO.
36#[derive(Eq, Debug, Clone, Serialize, Deserialize, Decode, Encode)]
37pub struct Commit {
38    pub id: ObjectHash,
39    pub tree_id: ObjectHash,
40    pub parent_commit_ids: Vec<ObjectHash>,
41    pub author: Signature,
42    pub committer: Signature,
43    pub message: String,
44}
45impl PartialEq for Commit {
46    fn eq(&self, other: &Self) -> bool {
47        self.id == other.id
48    }
49}
50
51impl Display for Commit {
52    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
53        writeln!(f, "tree: {}", self.tree_id)?;
54        for parent in self.parent_commit_ids.iter() {
55            writeln!(f, "parent: {parent}")?;
56        }
57        writeln!(f, "author {}", self.author)?;
58        writeln!(f, "committer {}", self.committer)?;
59        writeln!(f, "{}", self.message)
60    }
61}
62
63impl Commit {
64    pub fn new(
65        author: Signature,
66        committer: Signature,
67        tree_id: ObjectHash,
68        parent_commit_ids: Vec<ObjectHash>,
69        message: &str,
70    ) -> Commit {
71        let mut commit = Commit {
72            id: ObjectHash::default(),
73            tree_id,
74            parent_commit_ids,
75            author,
76            committer,
77            message: message.to_string(),
78        };
79        // Calculate the hash of the commit object
80        // The hash is calculated from the type and data of the commit object
81        let hash = ObjectHash::from_type_and_data(ObjectType::Commit, &commit.to_data().unwrap());
82        commit.id = hash;
83        commit
84    }
85
86    /// Creates a new commit object from a tree ID and a list of parent commit IDs.
87    /// This function generates the author and committer signatures using the current time
88    /// and a fixed email address.
89    /// It also sets the commit message to the provided string.
90    /// # Arguments
91    /// - `tree_id`: The SHA1/ SHA-256 hash of the tree object that this commit points to.
92    /// - `parent_commit_ids`: A vector of SHA1/ SHA-256 hashes of the parent commits.
93    /// - `message`: A string containing the commit message.
94    /// # Returns
95    /// A new `Commit` object with the specified tree ID, parent commit IDs, and commit message.
96    /// The author and committer signatures are generated using the current time and a fixed email address.
97    pub fn from_tree_id(
98        tree_id: ObjectHash,
99        parent_commit_ids: Vec<ObjectHash>,
100        message: &str,
101    ) -> Commit {
102        let author = Signature::from_data(
103            format!(
104                "author mega <admin@mega.org> {} +0800",
105                chrono::Utc::now().timestamp()
106            )
107            .to_string()
108            .into_bytes(),
109        )
110        .unwrap();
111        let committer = Signature::from_data(
112            format!(
113                "committer mega <admin@mega.org> {} +0800",
114                chrono::Utc::now().timestamp()
115            )
116            .to_string()
117            .into_bytes(),
118        )
119        .unwrap();
120        Commit::new(author, committer, tree_id, parent_commit_ids, message)
121    }
122
123    /// Formats the commit message by extracting the first meaningful line.
124    ///
125    /// If the message contains a PGP signature, it returns the first non-empty line
126    /// after the signature block. Otherwise, it returns the first non-empty line
127    /// in the message. If no such line exists, it returns the original message.
128    pub fn format_message(&self) -> String {
129        let mut lines = self.message.lines();
130
131        // If a PGP signature is present, skip lines until after the signature ends
132        if let Some(pos) = self
133            .message
134            .lines()
135            .position(|line| line.contains("-----END PGP SIGNATURE-----"))
136        {
137            return self
138                .message
139                .lines()
140                .skip(pos + 1)
141                .find(|line| !line.trim().is_empty())
142                .map(|line| line.to_owned())
143                .unwrap_or_else(|| self.message.clone());
144        }
145
146        // Return the first non-empty line from the start
147        lines
148            .find(|line| !line.trim().is_empty())
149            .map(|line| line.to_owned())
150            .unwrap_or_else(|| self.message.clone())
151    }
152}
153
154impl ObjectTrait for Commit {
155    fn from_bytes(data: &[u8], hash: ObjectHash) -> Result<Self, GitError>
156    where
157        Self: Sized,
158    {
159        let mut commit = data;
160        // Find the tree id and remove it from the data
161        let tree_end = commit.find_byte(0x0a).unwrap();
162        let tree_id: ObjectHash = ObjectHash::from_str(
163            String::from_utf8(commit[5..tree_end].to_owned()) // 5 is the length of "tree "
164                .unwrap()
165                .as_str(),
166        )
167        .unwrap();
168        let binding = commit[tree_end + 1..].to_vec(); // Move past the tree id
169        commit = &binding;
170
171        // Find the parent commit ids and remove them from the data
172        let author_begin = commit.find("author").unwrap();
173        // Find all parent commit ids
174        // The parent commit ids are all the lines that start with "parent "
175        // We can use find_iter to find all occurrences of "parent "
176        // and then extract the SHA1/ SHA-256 hashes from them.
177        let parent_commit_ids: Vec<ObjectHash> = commit[..author_begin]
178            .find_iter("parent")
179            .map(|parent| {
180                let parent_end = commit[parent..].find_byte(0x0a).unwrap();
181                ObjectHash::from_str(
182                    // 7 is the length of "parent "
183                    String::from_utf8(commit[parent + 7..parent + parent_end].to_owned())
184                        .unwrap()
185                        .as_str(),
186                )
187                .unwrap()
188            })
189            .collect();
190        let binding = commit[author_begin..].to_vec();
191        commit = &binding;
192
193        // Find the author and committer and remove them from the data
194        // 0x0a is the newline character
195        let author =
196            Signature::from_data(commit[..commit.find_byte(0x0a).unwrap()].to_vec()).unwrap();
197
198        let binding = commit[commit.find_byte(0x0a).unwrap() + 1..].to_vec();
199        commit = &binding;
200        let committer =
201            Signature::from_data(commit[..commit.find_byte(0x0a).unwrap()].to_vec()).unwrap();
202
203        // The rest is the message
204        let message = unsafe {
205            String::from_utf8_unchecked(commit[commit.find_byte(0x0a).unwrap() + 1..].to_vec())
206        };
207        Ok(Commit {
208            id: hash,
209            tree_id,
210            parent_commit_ids,
211            author,
212            committer,
213            message,
214        })
215    }
216
217    fn get_type(&self) -> ObjectType {
218        ObjectType::Commit
219    }
220
221    fn get_size(&self) -> usize {
222        self.to_data().map(|data| data.len()).unwrap_or(0)
223    }
224
225    /// [Git-Internals-Git-Objects](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects)
226    fn to_data(&self) -> Result<Vec<u8>, GitError> {
227        let mut data = Vec::new();
228
229        data.extend(b"tree ");
230        data.extend(self.tree_id.to_string().as_bytes());
231        data.extend(&[0x0a]);
232
233        for parent_tree_id in &self.parent_commit_ids {
234            data.extend(b"parent ");
235            data.extend(parent_tree_id.to_string().as_bytes());
236            data.extend(&[0x0a]);
237        }
238
239        data.extend(self.author.to_data()?);
240        data.extend(&[0x0a]);
241        data.extend(self.committer.to_data()?);
242        data.extend(&[0x0a]);
243        // Important! or Git Server can't parse & reply: unpack-objects abnormal exit
244        // We can move [0x0a] to message instead here.
245        // data.extend(&[0x0a]);
246        data.extend(self.message.as_bytes());
247
248        Ok(data)
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use std::str::FromStr;
255
256    use super::*;
257    use crate::hash::{HashKind, set_hash_kind_for_test};
258
259    /// Create a basic commit object for testing
260    fn basic_commit() -> Commit {
261        let _guard = set_hash_kind_for_test(HashKind::Sha1);
262        let raw_commit = br#"tree 341e54913a3a43069f2927cc0f703e5a9f730df1
263author benjamin.747 <benjamin.747@outlook.com> 1757467768 +0800
264committer benjamin.747 <benjamin.747@outlook.com> 1757491219 +0800
265gpgsig -----BEGIN PGP SIGNATURE-----
266
267 iQJNBAABCAA3FiEEs4MaYUV7JcjxsVMPyqxGczTZ6K4FAmjBMC4ZHGJlbmphbWlu
268 Ljc0N0BvdXRsb29rLmNvbQAKCRDKrEZzNNnorj73EADNpsyLAHsB3NgoeH+uy9Vq
269 G2+LRtlvqv3QMK7vbQUadXHlQYWk25SIk+WJ1kG1AnUy5fqOrLSDTA1ny+qwpH8O
270 +2sKCF/S1wlzqGWjCcRH5/ir9srsGIn9HbNqBjmU22NJ6Dt2jnqoUvtWfPwyqwWg
271 VpjYlj390cFdXTpH5hMvtlmUQB+zCSKtWQW2Ur64h/UsGtllARlACi+KHQQmA2/p
272 FLWNddvfJQpPM597DkGohQTD68g0PqOBhUkOHduHq7VHy68DVW+07bPNXK8JhJ8S
273 4dyV1sZwcVcov0GcKl0wUbEqzy4gf+zV7DQhkfrSRQMBdo5vCWahYj1AbgaTiu8a
274 hscshYDuWWqpxBU/+nCxOPskV29uUG1sRyXp3DqmKJZpnO9CVdw3QaVrqnMEeh2S
275 t/wYRI9aI1A+Mi/DETom5ifTVygMkK+3m1h7pAMOlblFEdZx2sDXPRG2IEUcatr4
276 Jb2+7PUJQXxUQnwHC7xHHxRh6a2h8TfEJfSoEyrgzxZ0CRxJ6XMJaJu0UwZ2xMsx
277 Lgmeu6miB/imwxz5R5RL2yVHbgllSlO5l12AIeBaPoarKXYPSALigQnKCXu5OM3x
278 Jq5qsSGtxdr6S1VgLyYHR4o69bQjzBp9K47J3IXqvrpo/ZiO/6Mspk2ZRWhGj82q
279 e3qERPp5b7+hA+M7jKPyJg==
280 =UeLf
281 -----END PGP SIGNATURE-----
282
283test parse commit from bytes
284"#;
285
286        let hash = ObjectHash::from_str("57d7685c60213a9da465cf900f31933be3a7ee39").unwrap();
287        Commit::from_bytes(raw_commit, hash).unwrap()
288    }
289
290    /// Create a basic commit object with SHA-256 for testing
291    fn basic_commit_sha256() -> Commit {
292        let _guard = set_hash_kind_for_test(HashKind::Sha256);
293        let raw_commit = br#"tree 0250024cf99636335fff1070e4220c5d8f67cb8633572d54b304629ad5382760
294parent 33324c6819589e8eed81d6c72f216469151a0f2dbe7f42ba021d8b63049eb754
295author jackieismpc <jackieismpc@gmail.com> 1764061895 +0800
296committer jackieismpc <jackieismpc@gmail.com> 1764061895 +0800
297gpgsig-sha256 -----BEGIN PGP SIGNATURE-----
298
299 iQIzBAABCAAdFiEEzW/BI6wDXimDk/4lItD7G/h4TUsFAmklcscACgkQItD7G/h4
300 TUtKFRAAtJq9tdl9XdND1ef2dXVQYCkQQlSdNHe2AR/QRVOPI39ZjD5aajRmZoE2
301 rKDenNML1ruiGEm+K3ntRDjus+3QF5Xkhj1D6eImQt6RXyOlo64I+GLRKlzw80Sl
302 hrd+l1eeuS4n46Z0U9fo1Qgc/crSn2VhUtLHJjvRntJoOb1vNreI2Y42Zmal3oVT
303 fQNQ7mqzh3KuWoa8T6nVrLaLH1vl9qhRgkPcIRbFf+ECbB96qykHqcbdHuneSgfx
304 +REpr1cedilkQlX81JrQ8Ntf4QFUPPHALl27/G6oPLT714cflEbvcFw7rNR+ktcD
305 ZJIMu5Cl7X3/v5e0od/hF9uPfiLHckUsOXiMFLfqRdZx/5XeQFWRpq4eYcW7e89e
306 3wJoBA2lCk8SHTBfsprKMpAweXJF9FCjRT5f9Zse2grqH81aQeNJnpSOoCq86oc/
307 nxhi8+rbIbClLCGQoGF7sE/fvmKqcex++JnXHcHTtK002Gnh3oHX07sbahlcGuYY
308 kg4QhXiLTQ5GfXnEnTPdFqbOVG02vEEsNeRgkmOz4c8Pm1FTDyOkuXd/Igvy7A9R
309 MZwQcJ6E4MnsMnoH8FKswGqCD7ftwtJtRzryORBVzvPKALufIXDVLyBbae9dxdej
310 bcpUK1bGtDljlwNtbLIOu+F1y2OVh7Tn3zxaQLcEhbUe2tP6rGk=
311 =nJMO
312 -----END PGP SIGNATURE-----
313
314signed sha256 commit for test"#;
315        let hash = ObjectHash::from_str(
316            "ed43b50437e260a4d8fedacbaa38bad28b54cc424925e4180d9f186afaa0508c",
317        )
318        .unwrap();
319        Commit::from_bytes(raw_commit.as_bytes(), hash).unwrap()
320    }
321
322    /// Test creating a Commit from bytes with PGP signature
323    #[test]
324    fn test_from_bytes_with_gpgsig() {
325        let commit = basic_commit();
326
327        assert_eq!(
328            commit.id,
329            ObjectHash::from_str("57d7685c60213a9da465cf900f31933be3a7ee39").unwrap()
330        );
331
332        assert_eq!(
333            commit.tree_id,
334            ObjectHash::from_str("341e54913a3a43069f2927cc0f703e5a9f730df1").unwrap()
335        );
336
337        assert_eq!(commit.author.name, "benjamin.747");
338        assert_eq!(commit.author.email, "benjamin.747@outlook.com");
339
340        assert_eq!(commit.committer.name, "benjamin.747");
341
342        // check message content(must contains gpgsig and content)
343        assert!(commit.message.contains("-----BEGIN PGP SIGNATURE-----"));
344        assert!(commit.message.contains("-----END PGP SIGNATURE-----"));
345        assert!(commit.message.contains("test parse commit from bytes"));
346    }
347
348    /// Test creating a Commit from bytes with SHA-256
349    #[test]
350    fn test_from_bytes_with_gpgsig_sha256() {
351        let commit = basic_commit_sha256();
352        assert_eq!(
353            commit.id,
354            ObjectHash::from_str(
355                "ed43b50437e260a4d8fedacbaa38bad28b54cc424925e4180d9f186afaa0508c"
356            )
357            .unwrap()
358        );
359        assert_eq!(
360            commit.tree_id,
361            ObjectHash::from_str(
362                "0250024cf99636335fff1070e4220c5d8f67cb8633572d54b304629ad5382760"
363            )
364            .unwrap()
365        );
366        assert_eq!(commit.author.name, "jackieismpc");
367        assert_eq!(commit.author.email, "jackieismpc@gmail.com");
368        assert_eq!(commit.committer.name, "jackieismpc");
369        // // check message content (must contain gpgsig-sha256 and content)
370        assert!(commit.message.contains("-----BEGIN PGP SIGNATURE-----"));
371        assert!(commit.message.contains("-----END PGP SIGNATURE-----"));
372        assert!(commit.message.contains("signed sha256 commit for test"));
373    }
374
375    /// Test formatting commit message with PGP signature
376    #[test]
377    fn test_format_message_with_pgp_signature() {
378        let _guard = set_hash_kind_for_test(HashKind::Sha1);
379        let commit = basic_commit();
380        assert_eq!(commit.format_message(), "test parse commit from bytes");
381    }
382
383    /// Test formatting commit message with SHA-256 PGP signature
384    #[test]
385    fn test_format_message_with_pgp_signature_sha256() {
386        let _guard = set_hash_kind_for_test(HashKind::Sha256);
387        let commit = basic_commit_sha256();
388        assert_eq!(commit.format_message(), "signed sha256 commit for test");
389    }
390}