git_internal/internal/object/
commit.rs

1//! The Commit object is a data structure used to represent a specific version of a project's
2//! files at a particular point in time. In Git, the commit object is a fundamental data structure
3//! that is used to track changes to a repository's files over time. Whenever a developer makes
4//! changes to the files in a repository, they create a new commit object that records those changes.
5//!
6//! Each commit object in Git contains the following information:
7//!
8//! - A unique SHA-1 hash that identifies the commit.
9//! - The author and committer of the commit (which may be different people).
10//! - The date and time the commit was made.
11//! - A commit message that describes the changes made in the commit.
12//! - A reference to the parent commit or commits (in the case of a merge commit) that the new commit is based on.
13//! - The contents of the files in the repository at the time the commit was made.
14use std::fmt::Display;
15use std::str::FromStr;
16
17use crate::errors::GitError;
18use crate::hash::SHA1;
19use crate::internal::object::ObjectTrait;
20use crate::internal::object::ObjectType;
21use crate::internal::object::signature::Signature;
22use bincode::{Decode, Encode};
23use bstr::ByteSlice;
24use serde::Deserialize;
25use serde::Serialize;
26
27/// The `Commit` struct is used to represent a commit object.
28///
29/// - The tree object SHA points to the top level tree for this commit, which reflects the complete
30///   state of the repository at the time of the commit. The tree object in turn points to blobs and
31///   subtrees which represent the files in the repository.
32/// - The parent commit SHAs allow Git to construct a linked list of commits and build the full
33///   commit history. By chaining together commits in this fashion, Git is able to represent the entire
34///   history of a repository with a single commit object at its root.
35/// - The author and committer fields contain the name, email address, timestamp and timezone.
36/// - The message field contains the commit message, which maybe include signed or DCO.
37#[derive(Eq, Debug, Clone, Serialize, Deserialize, Decode, Encode)]
38pub struct Commit {
39    pub id: SHA1,
40    pub tree_id: SHA1,
41    pub parent_commit_ids: Vec<SHA1>,
42    pub author: Signature,
43    pub committer: Signature,
44    pub message: String,
45}
46impl PartialEq for Commit {
47    fn eq(&self, other: &Self) -> bool {
48        self.id == other.id
49    }
50}
51
52impl Display for Commit {
53    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
54        writeln!(f, "tree: {}", self.tree_id)?;
55        for parent in self.parent_commit_ids.iter() {
56            writeln!(f, "parent: {parent}")?;
57        }
58        writeln!(f, "author {}", self.author)?;
59        writeln!(f, "committer {}", self.committer)?;
60        writeln!(f, "{}", self.message)
61    }
62}
63
64impl Commit {
65    pub fn new(
66        author: Signature,
67        committer: Signature,
68        tree_id: SHA1,
69        parent_commit_ids: Vec<SHA1>,
70        message: &str,
71    ) -> Commit {
72        let mut commit = Commit {
73            id: SHA1::default(),
74            tree_id,
75            parent_commit_ids,
76            author,
77            committer,
78            message: message.to_string(),
79        };
80        // Calculate the hash of the commit object
81        // The hash is calculated from the type and data of the commit object
82        let hash = SHA1::from_type_and_data(ObjectType::Commit, &commit.to_data().unwrap());
83        commit.id = hash;
84        commit
85    }
86
87    /// Creates a new commit object from a tree ID and a list of parent commit IDs.
88    /// This function generates the author and committer signatures using the current time
89    /// and a fixed email address.
90    /// It also sets the commit message to the provided string.
91    /// # Arguments
92    /// - `tree_id`: The SHA1 hash of the tree object that this commit points to.
93    /// - `parent_commit_ids`: A vector of SHA1 hashes of the parent commits.
94    /// - `message`: A string containing the commit message.
95    /// # Returns
96    /// A new `Commit` object with the specified tree ID, parent commit IDs, and commit message.
97    /// The author and committer signatures are generated using the current time and a fixed email address.
98    pub fn from_tree_id(tree_id: SHA1, parent_commit_ids: Vec<SHA1>, message: &str) -> Commit {
99        let author = Signature::from_data(
100            format!(
101                "author mega <admin@mega.org> {} +0800",
102                chrono::Utc::now().timestamp()
103            )
104            .to_string()
105            .into_bytes(),
106        )
107        .unwrap();
108        let committer = Signature::from_data(
109            format!(
110                "committer mega <admin@mega.org> {} +0800",
111                chrono::Utc::now().timestamp()
112            )
113            .to_string()
114            .into_bytes(),
115        )
116        .unwrap();
117        Commit::new(author, committer, tree_id, parent_commit_ids, message)
118    }
119
120    /// Formats the commit message by extracting the first meaningful line.
121    ///
122    /// If the message contains a PGP signature, it returns the first non-empty line
123    /// after the signature block. Otherwise, it returns the first non-empty line
124    /// in the message. If no such line exists, it returns the original message.
125    pub fn format_message(&self) -> String {
126        let mut lines = self.message.lines();
127
128        // If a PGP signature is present, skip lines until after the signature ends
129        if let Some(pos) = self
130            .message
131            .lines()
132            .position(|line| line.contains("-----END PGP SIGNATURE-----"))
133        {
134            return self
135                .message
136                .lines()
137                .skip(pos + 1)
138                .find(|line| !line.trim().is_empty())
139                .map(|line| line.to_owned())
140                .unwrap_or_else(|| self.message.clone());
141        }
142
143        // Return the first non-empty line from the start
144        lines
145            .find(|line| !line.trim().is_empty())
146            .map(|line| line.to_owned())
147            .unwrap_or_else(|| self.message.clone())
148    }
149}
150
151impl ObjectTrait for Commit {
152    fn from_bytes(data: &[u8], hash: SHA1) -> Result<Self, GitError>
153    where
154        Self: Sized,
155    {
156        let mut commit = data;
157        // Find the tree id and remove it from the data
158        let tree_end = commit.find_byte(0x0a).unwrap();
159        let tree_id: SHA1 = SHA1::from_str(
160            String::from_utf8(commit[5..tree_end].to_owned()) // 5 is the length of "tree "
161                .unwrap()
162                .as_str(),
163        )
164        .unwrap();
165        let binding = commit[tree_end + 1..].to_vec(); // Move past the tree id
166        commit = &binding;
167
168        // Find the parent commit ids and remove them from the data
169        let author_begin = commit.find("author").unwrap();
170        // Find all parent commit ids
171        // The parent commit ids are all the lines that start with "parent "
172        // We can use find_iter to find all occurrences of "parent "
173        // and then extract the SHA1 hashes from them.
174        let parent_commit_ids: Vec<SHA1> = commit[..author_begin]
175            .find_iter("parent")
176            .map(|parent| {
177                let parent_end = commit[parent..].find_byte(0x0a).unwrap();
178                SHA1::from_str(
179                    // 7 is the length of "parent "
180                    String::from_utf8(commit[parent + 7..parent + parent_end].to_owned())
181                        .unwrap()
182                        .as_str(),
183                )
184                .unwrap()
185            })
186            .collect();
187        let binding = commit[author_begin..].to_vec();
188        commit = &binding;
189
190        // Find the author and committer and remove them from the data
191        // 0x0a is the newline character
192        let author =
193            Signature::from_data(commit[..commit.find_byte(0x0a).unwrap()].to_vec()).unwrap();
194
195        let binding = commit[commit.find_byte(0x0a).unwrap() + 1..].to_vec();
196        commit = &binding;
197        let committer =
198            Signature::from_data(commit[..commit.find_byte(0x0a).unwrap()].to_vec()).unwrap();
199
200        // The rest is the message
201        let message = unsafe {
202            String::from_utf8_unchecked(commit[commit.find_byte(0x0a).unwrap() + 1..].to_vec())
203        };
204        Ok(Commit {
205            id: hash,
206            tree_id,
207            parent_commit_ids,
208            author,
209            committer,
210            message,
211        })
212    }
213
214    fn get_type(&self) -> ObjectType {
215        ObjectType::Commit
216    }
217
218    fn get_size(&self) -> usize {
219        0
220    }
221
222    /// [Git-Internals-Git-Objects](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects)
223    fn to_data(&self) -> Result<Vec<u8>, GitError> {
224        let mut data = Vec::new();
225
226        data.extend(b"tree ");
227        data.extend(self.tree_id.to_string().as_bytes());
228        data.extend(&[0x0a]);
229
230        for parent_tree_id in &self.parent_commit_ids {
231            data.extend(b"parent ");
232            data.extend(parent_tree_id.to_string().as_bytes());
233            data.extend(&[0x0a]);
234        }
235
236        data.extend(self.author.to_data()?);
237        data.extend(&[0x0a]);
238        data.extend(self.committer.to_data()?);
239        data.extend(&[0x0a]);
240        // Important! or Git Server can't parse & reply: unpack-objects abnormal exit
241        // We can move [0x0a] to message instead here.
242        // data.extend(&[0x0a]);
243        data.extend(self.message.as_bytes());
244
245        Ok(data)
246    }
247}
248
249#[cfg(test)]
250mod tests {
251    use super::*;
252    use std::str::FromStr;
253
254    fn basic_commit() -> Commit {
255        let raw_commit = br#"tree 341e54913a3a43069f2927cc0f703e5a9f730df1
256author benjamin.747 <benjamin.747@outlook.com> 1757467768 +0800
257committer benjamin.747 <benjamin.747@outlook.com> 1757491219 +0800
258gpgsig -----BEGIN PGP SIGNATURE-----
259
260 iQJNBAABCAA3FiEEs4MaYUV7JcjxsVMPyqxGczTZ6K4FAmjBMC4ZHGJlbmphbWlu
261 Ljc0N0BvdXRsb29rLmNvbQAKCRDKrEZzNNnorj73EADNpsyLAHsB3NgoeH+uy9Vq
262 G2+LRtlvqv3QMK7vbQUadXHlQYWk25SIk+WJ1kG1AnUy5fqOrLSDTA1ny+qwpH8O
263 +2sKCF/S1wlzqGWjCcRH5/ir9srsGIn9HbNqBjmU22NJ6Dt2jnqoUvtWfPwyqwWg
264 VpjYlj390cFdXTpH5hMvtlmUQB+zCSKtWQW2Ur64h/UsGtllARlACi+KHQQmA2/p
265 FLWNddvfJQpPM597DkGohQTD68g0PqOBhUkOHduHq7VHy68DVW+07bPNXK8JhJ8S
266 4dyV1sZwcVcov0GcKl0wUbEqzy4gf+zV7DQhkfrSRQMBdo5vCWahYj1AbgaTiu8a
267 hscshYDuWWqpxBU/+nCxOPskV29uUG1sRyXp3DqmKJZpnO9CVdw3QaVrqnMEeh2S
268 t/wYRI9aI1A+Mi/DETom5ifTVygMkK+3m1h7pAMOlblFEdZx2sDXPRG2IEUcatr4
269 Jb2+7PUJQXxUQnwHC7xHHxRh6a2h8TfEJfSoEyrgzxZ0CRxJ6XMJaJu0UwZ2xMsx
270 Lgmeu6miB/imwxz5R5RL2yVHbgllSlO5l12AIeBaPoarKXYPSALigQnKCXu5OM3x
271 Jq5qsSGtxdr6S1VgLyYHR4o69bQjzBp9K47J3IXqvrpo/ZiO/6Mspk2ZRWhGj82q
272 e3qERPp5b7+hA+M7jKPyJg==
273 =UeLf
274 -----END PGP SIGNATURE-----
275
276test parse commit from bytes
277"#;
278
279        let hash = SHA1::from_str("57d7685c60213a9da465cf900f31933be3a7ee39").unwrap();
280        Commit::from_bytes(raw_commit, hash).unwrap()
281    }
282
283    #[test]
284    fn test_from_bytes_with_gpgsig() {
285        let commit = basic_commit();
286
287        assert_eq!(
288            commit.id,
289            SHA1::from_str("57d7685c60213a9da465cf900f31933be3a7ee39").unwrap()
290        );
291
292        assert_eq!(
293            commit.tree_id,
294            SHA1::from_str("341e54913a3a43069f2927cc0f703e5a9f730df1").unwrap()
295        );
296
297        assert_eq!(commit.author.name, "benjamin.747");
298        assert_eq!(commit.author.email, "benjamin.747@outlook.com");
299
300        assert_eq!(commit.committer.name, "benjamin.747");
301
302        // check message content(must contains gpgsig and content)
303        assert!(commit.message.contains("-----BEGIN PGP SIGNATURE-----"));
304        assert!(commit.message.contains("-----END PGP SIGNATURE-----"));
305        assert!(commit.message.contains("test parse commit from bytes"));
306    }
307
308    #[test]
309    fn test_format_message_with_pgp_signature() {
310        let commit = basic_commit();
311        assert_eq!(commit.format_message(), "test parse commit from bytes");
312    }
313}