git_internal/
hash.rs

1//! In Git, the SHA-1 hash algorithm is widely used to generate unique identifiers for Git objects.
2//! Each Git object corresponds to a unique SHA-1 hash value, which is used to identify the object's
3//! location in the Git internal and mega database.
4//!
5
6use std::{fmt::Display, io};
7
8use bincode::{Decode, Encode};
9use colored::Colorize;
10use serde::{Deserialize, Serialize};
11use sha1::Digest;
12
13use crate::internal::object::types::ObjectType;
14
15/// The [`SHA1`] struct, encapsulating a `[u8; 20]` array, is specifically designed to represent Git hash IDs.
16/// In Git's context, these IDs are 40-character hexadecimal strings generated via the SHA-1 algorithm.
17/// Each Git object receives a unique hash ID based on its content, serving as an identifier for its location
18/// within the Git internal database. Utilizing a dedicated struct for these hash IDs enhances code readability and
19/// maintainability by providing a clear, structured format for their manipulation and storage.
20///
21/// ### Change Log
22///
23/// In previous versions of the 'mega' project, `Hash` was used to denote hash values. However, in newer versions,
24/// `SHA1` is employed for this purpose. Future updates plan to extend support to SHA256 and SHA512, or potentially
25/// other hash algorithms. By abstracting the hash model to `Hash`, and using specific imports like `use crate::hash::SHA1`
26/// or `use crate::hash::SHA256`, the codebase maintains a high level of clarity and maintainability. This design choice
27/// allows for easier adaptation to different hash algorithms while keeping the underlying implementation consistent and
28/// understandable. - Nov 26, 2023 (by @genedna)
29///
30#[derive(
31    Clone,
32    Copy,
33    Debug,
34    PartialEq,
35    Eq,
36    Hash,
37    PartialOrd,
38    Ord,
39    Default,
40    Deserialize,
41    Serialize,
42    Encode,
43    Decode,
44)]
45pub struct SHA1(pub [u8; 20]);
46
47/// Display trait for SHA1.
48impl Display for SHA1 {
49    /// Allows [`SHA1::to_string()`] to be used.
50    /// Note: If you want a terminal-friendly colorized output, use [`SHA1::to_color_str()`].
51    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
52        write!(f, "{}", hex::encode(self.0))
53    }
54}
55
56impl AsRef<[u8]> for SHA1 {
57    fn as_ref(&self) -> &[u8] {
58        &self.0
59    }
60}
61/// Implementation of the [`std::str::FromStr`] trait for the [`SHA1`] type.
62///
63/// To effectively use the `from_str` method for converting a string to a `SHA1` object, consider the following:
64///   1. The input string `s` should be a pre-calculated hexadecimal string, exactly 40 characters in length. This string
65///      represents a SHA1 hash and should conform to the standard SHA1 hash format.
66///   2. It is necessary to explicitly import the `FromStr` trait to utilize the `from_str` method. Include the import
67///      statement `use std::str::FromStr;` in your code before invoking the `from_str` function. This import ensures
68///      that the `from_str` method is available for converting strings to `SHA1` objects.
69impl std::str::FromStr for SHA1 {
70    type Err = String;
71
72    fn from_str(s: &str) -> Result<Self, Self::Err> {
73        let mut h = SHA1::default();
74        if s.len() != 40 {
75            return Err("The length of the string is not 40".to_string());
76        }
77        let bytes = hex::decode(s).map_err(|e| e.to_string())?;
78        h.0.copy_from_slice(bytes.as_slice());
79        Ok(h)
80    }
81}
82
83/// Implementation of the `SHA1` struct.
84///
85/// The naming conventions for the methods in this implementation are designed to be intuitive and self-explanatory:
86///
87/// 1. `new` Prefix:
88///    Methods starting with `new` are used for computing an SHA-1 hash from given data, signifying the creation of
89///    a new `SHA1` instance. For example, `pub fn new(data: &Vec<u8>) -> SHA1` takes a byte vector and calculates its SHA-1 hash.
90///
91/// 2. `from` Prefix:
92///    Methods beginning with `from` are intended for creating a `SHA1` instance from an existing, pre-calculated value.
93///    This implies direct derivation of the `SHA1` object from the provided input. For instance, `pub fn from_bytes(bytes: &[u8]) -> SHA1`
94///    constructs a `SHA1` from a 20-byte array representing an SHA-1 hash.
95///
96/// 3. `to` Prefix:
97///    Methods with the `to` prefix are used for outputting the `SHA1` value in various formats. This prefix indicates a transformation or
98///    conversion of the `SHA1` instance into another representation. For example, `pub fn to_string(self) -> String` converts the SHA1
99///    value to a plain hexadecimal string, and `pub fn to_data(self) -> Vec<u8>` converts it into a byte vector. The `to` prefix
100///    thus serves as a clear indicator that the method is exporting or transforming the SHA1 value into a different format.
101///
102/// These method naming conventions (`new`, `from`, `to`) provide clarity and predictability in the API, making it easier for users
103/// to understand the intended use and functionality of each method within the `SHA1` struct.
104impl SHA1 {
105    // The size of the SHA-1 hash value in bytes
106    pub const SIZE: usize = 20;
107
108    /// Calculate the SHA-1 hash of the byte slice, then create a Hash value
109    pub fn new(data: &[u8]) -> SHA1 {
110        let h = sha1::Sha1::digest(data);
111        SHA1::from_bytes(h.as_slice())
112    }
113    /// Create a Hash from the object type and data
114    /// This function is used to create a SHA1 hash from the object type and data.
115    /// It constructs a byte vector that includes the object type, the size of the data,
116    /// and the data itself, and then computes the SHA1 hash of this byte vector.
117    ///  
118    ///  Hash compute <- {Object Type}+{ }+{Object Size(before compress)}+{\x00}+{Object Content(before compress)}
119    pub fn from_type_and_data(object_type: ObjectType, data: &[u8]) -> SHA1 {
120        let mut d: Vec<u8> = Vec::new();
121        d.extend(object_type.to_data().unwrap());
122        d.push(b' ');
123        d.extend(data.len().to_string().as_bytes());
124        d.push(b'\x00');
125        d.extend(data);
126        SHA1::new(&d)
127    }
128
129    /// Create Hash from a byte array, which is a 20-byte array already calculated
130    pub fn from_bytes(bytes: &[u8]) -> SHA1 {
131        let mut h = SHA1::default();
132        h.0.copy_from_slice(bytes);
133        h
134    }
135
136    /// Read the Hash value from the stream
137    /// This function will read exactly 20 bytes from the stream
138    pub fn from_stream(data: &mut impl io::Read) -> io::Result<SHA1> {
139        let mut h = SHA1::default();
140        data.read_exact(&mut h.0)?;
141        Ok(h)
142    }
143
144    /// Export sha1 value to String with the color
145    pub fn to_color_str(self) -> String {
146        self.to_string().red().bold().to_string()
147    }
148
149    /// Export sha1 value to a byte array
150    pub fn to_data(self) -> Vec<u8> {
151        self.0.to_vec()
152    }
153
154    /// [`core::fmt::Display`] is somewhat expensive,
155    /// use this hack to get a string more efficiently
156    pub fn _to_string(&self) -> String {
157        hex::encode(self.0)
158    }
159}
160
161#[cfg(test)]
162mod tests {
163
164    use std::io::BufReader;
165    use std::io::Read;
166    use std::io::Seek;
167    use std::io::SeekFrom;
168    use std::str::FromStr;
169    use std::{env, path::PathBuf};
170
171    use crate::hash::SHA1;
172
173    #[test]
174    fn test_sha1_new() {
175        // Example input
176        let data = "Hello, world!".as_bytes();
177
178        // Generate SHA1 hash from the input data
179        let sha1 = SHA1::new(data);
180
181        // Known SHA1 hash for "Hello, world!"
182        let expected_sha1_hash = "943a702d06f34599aee1f8da8ef9f7296031d699";
183
184        assert_eq!(sha1.to_string(), expected_sha1_hash);
185    }
186
187    #[test]
188    fn test_signature_without_delta() {
189        let mut source = PathBuf::from(env::current_dir().unwrap().parent().unwrap());
190        source.push("tests/data/packs/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack");
191
192        let f = std::fs::File::open(source).unwrap();
193        let mut buffered = BufReader::new(f);
194
195        buffered.seek(SeekFrom::End(-20)).unwrap();
196        let mut buffer = vec![0; 20];
197        buffered.read_exact(&mut buffer).unwrap();
198        let signature = SHA1::from_bytes(buffer.as_ref());
199        assert_eq!(
200            signature.to_string(),
201            "1d0e6c14760c956c173ede71cb28f33d921e232f"
202        );
203    }
204
205    #[test]
206    fn test_sha1_from_bytes() {
207        let sha1 = SHA1::from_bytes(&[
208            0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
209            0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
210        ]);
211
212        assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
213    }
214
215    #[test]
216    fn test_from_stream() {
217        let source = [
218            0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
219            0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
220        ];
221        let mut reader = std::io::Cursor::new(source);
222        let sha1 = SHA1::from_stream(&mut reader).unwrap();
223        assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
224    }
225
226    #[test]
227    fn test_sha1_from_str() {
228        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
229
230        match SHA1::from_str(hash_str) {
231            Ok(hash) => {
232                assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
233            }
234            Err(e) => println!("Error: {e}"),
235        }
236    }
237
238    #[test]
239    fn test_sha1_to_string() {
240        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
241
242        match SHA1::from_str(hash_str) {
243            Ok(hash) => {
244                assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
245            }
246            Err(e) => println!("Error: {e}"),
247        }
248    }
249
250    #[test]
251    fn test_sha1_to_data() {
252        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
253
254        match SHA1::from_str(hash_str) {
255            Ok(hash) => {
256                assert_eq!(
257                    hash.to_data(),
258                    vec![
259                        0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b,
260                        0x0f, 0x24, 0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d
261                    ]
262                );
263            }
264            Err(e) => println!("Error: {e}"),
265        }
266    }
267}