git_internal/internal/object/
commit.rs

1//! The Commit object is a data structure used to represent a specific version of a project's
2//! files at a particular point in time. In Git, the commit object is a fundamental data structure
3//! that is used to track changes to a repository's files over time. Whenever a developer makes
4//! changes to the files in a repository, they create a new commit object that records those changes.
5//!
6//! Each commit object in Git contains the following information:
7//!
8//! - A unique SHA-1/ SHA-256 hash that identifies the commit.
9//! - The author and committer of the commit (which may be different people).
10//! - The date and time the commit was made.
11//! - A commit message that describes the changes made in the commit.
12//! - A reference to the parent commit or commits (in the case of a merge commit) that the new commit is based on.
13//! - The contents of the files in the repository at the time the commit was made.
14use std::fmt::Display;
15use std::str::FromStr;
16
17use crate::errors::GitError;
18use crate::hash::ObjectHash;
19use crate::internal::object::ObjectTrait;
20use crate::internal::object::ObjectType;
21use crate::internal::object::signature::Signature;
22use bincode::{Decode, Encode};
23use bstr::ByteSlice;
24use serde::Deserialize;
25use serde::Serialize;
26
27/// The `Commit` struct is used to represent a commit object.
28///
29/// - The tree object SHA points to the top level tree for this commit, which reflects the complete
30///   state of the repository at the time of the commit. The tree object in turn points to blobs and
31///   subtrees which represent the files in the repository.
32/// - The parent commit SHAs allow Git to construct a linked list of commits and build the full
33///   commit history. By chaining together commits in this fashion, Git is able to represent the entire
34///   history of a repository with a single commit object at its root.
35/// - The author and committer fields contain the name, email address, timestamp and timezone.
36/// - The message field contains the commit message, which maybe include signed or DCO.
37#[derive(Eq, Debug, Clone, Serialize, Deserialize, Decode, Encode)]
38pub struct Commit {
39    pub id: ObjectHash,
40    pub tree_id: ObjectHash,
41    pub parent_commit_ids: Vec<ObjectHash>,
42    pub author: Signature,
43    pub committer: Signature,
44    pub message: String,
45}
46impl PartialEq for Commit {
47    fn eq(&self, other: &Self) -> bool {
48        self.id == other.id
49    }
50}
51
52impl Display for Commit {
53    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
54        writeln!(f, "tree: {}", self.tree_id)?;
55        for parent in self.parent_commit_ids.iter() {
56            writeln!(f, "parent: {parent}")?;
57        }
58        writeln!(f, "author {}", self.author)?;
59        writeln!(f, "committer {}", self.committer)?;
60        writeln!(f, "{}", self.message)
61    }
62}
63
64impl Commit {
65    pub fn new(
66        author: Signature,
67        committer: Signature,
68        tree_id: ObjectHash,
69        parent_commit_ids: Vec<ObjectHash>,
70        message: &str,
71    ) -> Commit {
72        let mut commit = Commit {
73            id: ObjectHash::default(),
74            tree_id,
75            parent_commit_ids,
76            author,
77            committer,
78            message: message.to_string(),
79        };
80        // Calculate the hash of the commit object
81        // The hash is calculated from the type and data of the commit object
82        let hash = ObjectHash::from_type_and_data(ObjectType::Commit, &commit.to_data().unwrap());
83        commit.id = hash;
84        commit
85    }
86
87    /// Creates a new commit object from a tree ID and a list of parent commit IDs.
88    /// This function generates the author and committer signatures using the current time
89    /// and a fixed email address.
90    /// It also sets the commit message to the provided string.
91    /// # Arguments
92    /// - `tree_id`: The SHA1/ SHA-256 hash of the tree object that this commit points to.
93    /// - `parent_commit_ids`: A vector of SHA1/ SHA-256 hashes of the parent commits.
94    /// - `message`: A string containing the commit message.
95    /// # Returns
96    /// A new `Commit` object with the specified tree ID, parent commit IDs, and commit message.
97    /// The author and committer signatures are generated using the current time and a fixed email address.
98    pub fn from_tree_id(
99        tree_id: ObjectHash,
100        parent_commit_ids: Vec<ObjectHash>,
101        message: &str,
102    ) -> Commit {
103        let author = Signature::from_data(
104            format!(
105                "author mega <admin@mega.org> {} +0800",
106                chrono::Utc::now().timestamp()
107            )
108            .to_string()
109            .into_bytes(),
110        )
111        .unwrap();
112        let committer = Signature::from_data(
113            format!(
114                "committer mega <admin@mega.org> {} +0800",
115                chrono::Utc::now().timestamp()
116            )
117            .to_string()
118            .into_bytes(),
119        )
120        .unwrap();
121        Commit::new(author, committer, tree_id, parent_commit_ids, message)
122    }
123
124    /// Formats the commit message by extracting the first meaningful line.
125    ///
126    /// If the message contains a PGP signature, it returns the first non-empty line
127    /// after the signature block. Otherwise, it returns the first non-empty line
128    /// in the message. If no such line exists, it returns the original message.
129    pub fn format_message(&self) -> String {
130        let mut lines = self.message.lines();
131
132        // If a PGP signature is present, skip lines until after the signature ends
133        if let Some(pos) = self
134            .message
135            .lines()
136            .position(|line| line.contains("-----END PGP SIGNATURE-----"))
137        {
138            return self
139                .message
140                .lines()
141                .skip(pos + 1)
142                .find(|line| !line.trim().is_empty())
143                .map(|line| line.to_owned())
144                .unwrap_or_else(|| self.message.clone());
145        }
146
147        // Return the first non-empty line from the start
148        lines
149            .find(|line| !line.trim().is_empty())
150            .map(|line| line.to_owned())
151            .unwrap_or_else(|| self.message.clone())
152    }
153}
154
155impl ObjectTrait for Commit {
156    fn from_bytes(data: &[u8], hash: ObjectHash) -> Result<Self, GitError>
157    where
158        Self: Sized,
159    {
160        let mut commit = data;
161        // Find the tree id and remove it from the data
162        let tree_end = commit.find_byte(0x0a).unwrap();
163        let tree_id: ObjectHash = ObjectHash::from_str(
164            String::from_utf8(commit[5..tree_end].to_owned()) // 5 is the length of "tree "
165                .unwrap()
166                .as_str(),
167        )
168        .unwrap();
169        let binding = commit[tree_end + 1..].to_vec(); // Move past the tree id
170        commit = &binding;
171
172        // Find the parent commit ids and remove them from the data
173        let author_begin = commit.find("author").unwrap();
174        // Find all parent commit ids
175        // The parent commit ids are all the lines that start with "parent "
176        // We can use find_iter to find all occurrences of "parent "
177        // and then extract the SHA1/ SHA-256 hashes from them.
178        let parent_commit_ids: Vec<ObjectHash> = commit[..author_begin]
179            .find_iter("parent")
180            .map(|parent| {
181                let parent_end = commit[parent..].find_byte(0x0a).unwrap();
182                ObjectHash::from_str(
183                    // 7 is the length of "parent "
184                    String::from_utf8(commit[parent + 7..parent + parent_end].to_owned())
185                        .unwrap()
186                        .as_str(),
187                )
188                .unwrap()
189            })
190            .collect();
191        let binding = commit[author_begin..].to_vec();
192        commit = &binding;
193
194        // Find the author and committer and remove them from the data
195        // 0x0a is the newline character
196        let author =
197            Signature::from_data(commit[..commit.find_byte(0x0a).unwrap()].to_vec()).unwrap();
198
199        let binding = commit[commit.find_byte(0x0a).unwrap() + 1..].to_vec();
200        commit = &binding;
201        let committer =
202            Signature::from_data(commit[..commit.find_byte(0x0a).unwrap()].to_vec()).unwrap();
203
204        // The rest is the message
205        let message = unsafe {
206            String::from_utf8_unchecked(commit[commit.find_byte(0x0a).unwrap() + 1..].to_vec())
207        };
208        Ok(Commit {
209            id: hash,
210            tree_id,
211            parent_commit_ids,
212            author,
213            committer,
214            message,
215        })
216    }
217
218    fn get_type(&self) -> ObjectType {
219        ObjectType::Commit
220    }
221
222    fn get_size(&self) -> usize {
223        0
224    }
225
226    /// [Git-Internals-Git-Objects](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects)
227    fn to_data(&self) -> Result<Vec<u8>, GitError> {
228        let mut data = Vec::new();
229
230        data.extend(b"tree ");
231        data.extend(self.tree_id.to_string().as_bytes());
232        data.extend(&[0x0a]);
233
234        for parent_tree_id in &self.parent_commit_ids {
235            data.extend(b"parent ");
236            data.extend(parent_tree_id.to_string().as_bytes());
237            data.extend(&[0x0a]);
238        }
239
240        data.extend(self.author.to_data()?);
241        data.extend(&[0x0a]);
242        data.extend(self.committer.to_data()?);
243        data.extend(&[0x0a]);
244        // Important! or Git Server can't parse & reply: unpack-objects abnormal exit
245        // We can move [0x0a] to message instead here.
246        // data.extend(&[0x0a]);
247        data.extend(self.message.as_bytes());
248
249        Ok(data)
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256    use crate::hash::{HashKind, set_hash_kind_for_test};
257    use std::str::FromStr;
258
259    fn basic_commit() -> Commit {
260        let _guard = set_hash_kind_for_test(HashKind::Sha1);
261        let raw_commit = br#"tree 341e54913a3a43069f2927cc0f703e5a9f730df1
262author benjamin.747 <benjamin.747@outlook.com> 1757467768 +0800
263committer benjamin.747 <benjamin.747@outlook.com> 1757491219 +0800
264gpgsig -----BEGIN PGP SIGNATURE-----
265
266 iQJNBAABCAA3FiEEs4MaYUV7JcjxsVMPyqxGczTZ6K4FAmjBMC4ZHGJlbmphbWlu
267 Ljc0N0BvdXRsb29rLmNvbQAKCRDKrEZzNNnorj73EADNpsyLAHsB3NgoeH+uy9Vq
268 G2+LRtlvqv3QMK7vbQUadXHlQYWk25SIk+WJ1kG1AnUy5fqOrLSDTA1ny+qwpH8O
269 +2sKCF/S1wlzqGWjCcRH5/ir9srsGIn9HbNqBjmU22NJ6Dt2jnqoUvtWfPwyqwWg
270 VpjYlj390cFdXTpH5hMvtlmUQB+zCSKtWQW2Ur64h/UsGtllARlACi+KHQQmA2/p
271 FLWNddvfJQpPM597DkGohQTD68g0PqOBhUkOHduHq7VHy68DVW+07bPNXK8JhJ8S
272 4dyV1sZwcVcov0GcKl0wUbEqzy4gf+zV7DQhkfrSRQMBdo5vCWahYj1AbgaTiu8a
273 hscshYDuWWqpxBU/+nCxOPskV29uUG1sRyXp3DqmKJZpnO9CVdw3QaVrqnMEeh2S
274 t/wYRI9aI1A+Mi/DETom5ifTVygMkK+3m1h7pAMOlblFEdZx2sDXPRG2IEUcatr4
275 Jb2+7PUJQXxUQnwHC7xHHxRh6a2h8TfEJfSoEyrgzxZ0CRxJ6XMJaJu0UwZ2xMsx
276 Lgmeu6miB/imwxz5R5RL2yVHbgllSlO5l12AIeBaPoarKXYPSALigQnKCXu5OM3x
277 Jq5qsSGtxdr6S1VgLyYHR4o69bQjzBp9K47J3IXqvrpo/ZiO/6Mspk2ZRWhGj82q
278 e3qERPp5b7+hA+M7jKPyJg==
279 =UeLf
280 -----END PGP SIGNATURE-----
281
282test parse commit from bytes
283"#;
284
285        let hash = ObjectHash::from_str("57d7685c60213a9da465cf900f31933be3a7ee39").unwrap();
286        Commit::from_bytes(raw_commit, hash).unwrap()
287    }
288
289    fn basic_commit_sha256() -> Commit {
290        let _guard = set_hash_kind_for_test(HashKind::Sha256);
291        let raw_commit = br#"tree 0250024cf99636335fff1070e4220c5d8f67cb8633572d54b304629ad5382760
292parent 33324c6819589e8eed81d6c72f216469151a0f2dbe7f42ba021d8b63049eb754
293author jackieismpc <jackieismpc@gmail.com> 1764061895 +0800
294committer jackieismpc <jackieismpc@gmail.com> 1764061895 +0800
295gpgsig-sha256 -----BEGIN PGP SIGNATURE-----
296
297 iQIzBAABCAAdFiEEzW/BI6wDXimDk/4lItD7G/h4TUsFAmklcscACgkQItD7G/h4
298 TUtKFRAAtJq9tdl9XdND1ef2dXVQYCkQQlSdNHe2AR/QRVOPI39ZjD5aajRmZoE2
299 rKDenNML1ruiGEm+K3ntRDjus+3QF5Xkhj1D6eImQt6RXyOlo64I+GLRKlzw80Sl
300 hrd+l1eeuS4n46Z0U9fo1Qgc/crSn2VhUtLHJjvRntJoOb1vNreI2Y42Zmal3oVT
301 fQNQ7mqzh3KuWoa8T6nVrLaLH1vl9qhRgkPcIRbFf+ECbB96qykHqcbdHuneSgfx
302 +REpr1cedilkQlX81JrQ8Ntf4QFUPPHALl27/G6oPLT714cflEbvcFw7rNR+ktcD
303 ZJIMu5Cl7X3/v5e0od/hF9uPfiLHckUsOXiMFLfqRdZx/5XeQFWRpq4eYcW7e89e
304 3wJoBA2lCk8SHTBfsprKMpAweXJF9FCjRT5f9Zse2grqH81aQeNJnpSOoCq86oc/
305 nxhi8+rbIbClLCGQoGF7sE/fvmKqcex++JnXHcHTtK002Gnh3oHX07sbahlcGuYY
306 kg4QhXiLTQ5GfXnEnTPdFqbOVG02vEEsNeRgkmOz4c8Pm1FTDyOkuXd/Igvy7A9R
307 MZwQcJ6E4MnsMnoH8FKswGqCD7ftwtJtRzryORBVzvPKALufIXDVLyBbae9dxdej
308 bcpUK1bGtDljlwNtbLIOu+F1y2OVh7Tn3zxaQLcEhbUe2tP6rGk=
309 =nJMO
310 -----END PGP SIGNATURE-----
311
312signed sha256 commit for test"#;
313        let hash = ObjectHash::from_str(
314            "ed43b50437e260a4d8fedacbaa38bad28b54cc424925e4180d9f186afaa0508c",
315        )
316        .unwrap();
317        Commit::from_bytes(raw_commit.as_bytes(), hash).unwrap()
318    }
319    #[test]
320    fn test_from_bytes_with_gpgsig() {
321        let commit = basic_commit();
322
323        assert_eq!(
324            commit.id,
325            ObjectHash::from_str("57d7685c60213a9da465cf900f31933be3a7ee39").unwrap()
326        );
327
328        assert_eq!(
329            commit.tree_id,
330            ObjectHash::from_str("341e54913a3a43069f2927cc0f703e5a9f730df1").unwrap()
331        );
332
333        assert_eq!(commit.author.name, "benjamin.747");
334        assert_eq!(commit.author.email, "benjamin.747@outlook.com");
335
336        assert_eq!(commit.committer.name, "benjamin.747");
337
338        // check message content(must contains gpgsig and content)
339        assert!(commit.message.contains("-----BEGIN PGP SIGNATURE-----"));
340        assert!(commit.message.contains("-----END PGP SIGNATURE-----"));
341        assert!(commit.message.contains("test parse commit from bytes"));
342    }
343    #[test]
344    fn test_from_bytes_with_gpgsig_sha256() {
345        let commit = basic_commit_sha256();
346        assert_eq!(
347            commit.id,
348            ObjectHash::from_str(
349                "ed43b50437e260a4d8fedacbaa38bad28b54cc424925e4180d9f186afaa0508c"
350            )
351            .unwrap()
352        );
353        assert_eq!(
354            commit.tree_id,
355            ObjectHash::from_str(
356                "0250024cf99636335fff1070e4220c5d8f67cb8633572d54b304629ad5382760"
357            )
358            .unwrap()
359        );
360        assert_eq!(commit.author.name, "jackieismpc");
361        assert_eq!(commit.author.email, "jackieismpc@gmail.com");
362        assert_eq!(commit.committer.name, "jackieismpc");
363        // // check message content (must contain gpgsig-sha256 and content)
364        assert!(commit.message.contains("-----BEGIN PGP SIGNATURE-----"));
365        assert!(commit.message.contains("-----END PGP SIGNATURE-----"));
366        assert!(commit.message.contains("signed sha256 commit for test"));
367    }
368    #[test]
369    fn test_format_message_with_pgp_signature() {
370        let _guard = set_hash_kind_for_test(HashKind::Sha1);
371        let commit = basic_commit();
372        assert_eq!(commit.format_message(), "test parse commit from bytes");
373    }
374    #[test]
375    fn test_format_message_with_pgp_signature_sha256() {
376        let _guard = set_hash_kind_for_test(HashKind::Sha256);
377        let commit = basic_commit_sha256();
378        assert_eq!(commit.format_message(), "signed sha256 commit for test");
379    }
380}