git_internal/
hash.rs

1//! In Git, the SHA-1 hash algorithm is widely used to generate unique identifiers for Git objects.
2//! Each Git object corresponds to a unique SHA-1 hash value, which is used to identify the object's
3//! location in the Git internal and mega database.
4//!
5
6use std::{cell::RefCell, fmt::Display, hash::Hash, io, str::FromStr};
7
8use crate::internal::object::types::ObjectType;
9use bincode::{Decode, Encode};
10use colored::Colorize;
11use serde::{Deserialize, Serialize};
12use sha1::Digest;
13pub type SHA1 = ObjectHash;
14/// The [`SHA1`] struct, encapsulating a `[u8; 20]` array, is specifically designed to represent Git hash IDs.
15/// In Git's context, these IDs are 40-character hexadecimal strings generated via the SHA-1 algorithm.
16/// Each Git object receives a unique hash ID based on its content, serving as an identifier for its location
17/// within the Git internal database. Utilizing a dedicated struct for these hash IDs enhances code readability and
18/// maintainability by providing a clear, structured format for their manipulation and storage.
19///
20/// The [`HashKind`] enum represents different types of hash algorithms supported in Git,
21#[derive(
22    Clone,
23    Copy,
24    Debug,
25    PartialEq,
26    Eq,
27    Hash,
28    PartialOrd,
29    Ord,
30    Default,
31    Deserialize,
32    Serialize,
33    Encode,
34    Decode,
35)]
36pub enum HashKind {
37    #[default]
38    Sha1,
39    Sha256,
40}
41/// Implementation of methods for the [`HashKind`] enum.
42impl HashKind {
43    pub const fn size(&self) -> usize {
44        match self {
45            HashKind::Sha1 => 20,
46            HashKind::Sha256 => 32,
47            // Add more hash kinds here as needed
48        }
49    }
50    pub const fn hex_len(&self) -> usize {
51        match self {
52            HashKind::Sha1 => 40,
53            HashKind::Sha256 => 64,
54        }
55    }
56    pub const fn as_str(&self) -> &'static str {
57        match self {
58            HashKind::Sha1 => "sha1",
59            HashKind::Sha256 => "sha256",
60        }
61    }
62}
63impl std::fmt::Display for HashKind {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        f.write_str(self.as_str())
66    }
67}
68impl std::str::FromStr for HashKind {
69    type Err = String;
70
71    fn from_str(s: &str) -> Result<Self, Self::Err> {
72        match s.to_ascii_lowercase().as_str() {
73            "sha1" => Ok(HashKind::Sha1),
74            "sha256" => Ok(HashKind::Sha256),
75            _ => Err("Invalid hash kind".to_string()),
76        }
77    }
78}
79
80#[derive(
81    Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Deserialize, Serialize, Encode, Decode,
82)]
83pub enum ObjectHash {
84    Sha1([u8; 20]),
85    Sha256([u8; 32]),
86}
87impl Default for ObjectHash {
88    fn default() -> Self {
89        ObjectHash::Sha1([0u8; 20])
90    }
91}
92impl Display for ObjectHash {
93    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
94        write!(f, "{}", hex::encode(self.as_ref()))
95    }
96}
97impl AsRef<[u8]> for ObjectHash {
98    fn as_ref(&self) -> &[u8] {
99        match self {
100            ObjectHash::Sha1(bytes) => bytes.as_slice(),
101            ObjectHash::Sha256(bytes) => bytes.as_slice(),
102        }
103    }
104}
105/// Implementation of the [`std::str::FromStr`] trait for the [`ObjectHash`] enum.
106/// To effectively use the `from_str` method for converting a string to an `ObjectHash` object, consider the following:
107///   1. The input string `s` should be a pre-calculated hexadecimal string, either 40 characters in length for SHA1 or 64 characters for SHA256.
108///      This string represents a hash and should conform to the standard hash format.
109///   2. It is necessary to explicitly import the `FromStr` trait to utilize the `from_str` method. Include the import
110///      statement `use std::str::FromStr;` in your code before invoking the `from_str` function. This import ensures
111impl FromStr for ObjectHash {
112    type Err = String;
113
114    fn from_str(s: &str) -> Result<Self, Self::Err> {
115        match s.len() {
116            40 => {
117                let mut h = [0u8; 20];
118                let bytes = hex::decode(s).map_err(|e| e.to_string())?;
119                h.copy_from_slice(bytes.as_slice());
120                Ok(ObjectHash::Sha1(h))
121            }
122            64 => {
123                let mut h = [0u8; 32];
124                let bytes = hex::decode(s).map_err(|e| e.to_string())?;
125                h.copy_from_slice(bytes.as_slice());
126                Ok(ObjectHash::Sha256(h))
127            }
128            _ => Err("Invalid hash length".to_string()),
129        }
130    }
131}
132
133/// Implementation of methods for the [`ObjectHash`] enum.
134/// 1. The `kind` method determines the type of hash (SHA1 or SHA256) based on the variant of the `ObjectHash` enum.
135/// 2. The `size` method returns the size of the hash in bytes, utilizing the `kind` method to determine the appropriate size.
136/// 3. The `new` method computes the hash of the provided data using the specified hash kind (SHA1 or SHA256) and returns
137///    an `ObjectHash` instance containing the computed hash.
138/// 4. `from` Prefix:Methods to create an `ObjectHash` from different sources:
139///   - `from_type_and_data`: Constructs an `ObjectHash` from an object type and its associated data.
140///  - `from_bytes`: Creates an `ObjectHash` from a byte slice, ensuring the length matches the expected hash size.
141/// - `from_stream`: Reads bytes from a stream to create an `ObjectHash`, ensuring the correct number of bytes are read based on the hash kind.
142/// 5. `to` Prefix:Methods to convert an `ObjectHash` to different formats:
143///  - `to_color_str`: Converts the hash to a colored string representation for display purposes
144/// - `to_data`: Converts the hash to a byte vector.
145/// - `_to_string`: Converts the hash to a hexadecimal string representation.
146///
147impl ObjectHash {
148    /// returns a zeroed hash value for the given hash kind
149    pub fn zero_str(kind: HashKind) -> String {
150        match kind {
151            HashKind::Sha1 => "0000000000000000000000000000000000000000".to_string(),
152            HashKind::Sha256 => {
153                "0000000000000000000000000000000000000000000000000000000000000000".to_string()
154            }
155        }
156    }
157
158    /// returns the kind of hash
159    pub fn kind(&self) -> HashKind {
160        match self {
161            ObjectHash::Sha1(_) => HashKind::Sha1,
162            ObjectHash::Sha256(_) => HashKind::Sha256,
163        }
164    }
165    /// returns the size of hash in bytes
166    pub fn size(&self) -> usize {
167        self.kind().size()
168    }
169
170    /// Calculates the hash of the given data using the specified hash kind.
171    pub fn new(data: &[u8]) -> ObjectHash {
172        match get_hash_kind() {
173            HashKind::Sha1 => {
174                let h = sha1::Sha1::digest(data);
175                let mut bytes = [0u8; 20];
176                bytes.copy_from_slice(h.as_ref());
177                ObjectHash::Sha1(bytes)
178            }
179            HashKind::Sha256 => {
180                let h = sha2::Sha256::digest(data);
181                let mut bytes = [0u8; 32];
182                bytes.copy_from_slice(h.as_ref());
183                ObjectHash::Sha256(bytes)
184            }
185        }
186    }
187    /// Create ObjectHash from object type and data
188    pub fn from_type_and_data(object_type: ObjectType, data: &[u8]) -> ObjectHash {
189        let mut d: Vec<u8> = Vec::new();
190        d.extend(object_type.to_data().unwrap());
191        d.push(b' ');
192        d.extend(data.len().to_string().as_bytes());
193        d.push(b'\x00');
194        d.extend(data);
195        ObjectHash::new(&d)
196    }
197    /// Create ObjectHash from a byte slice
198    pub fn from_bytes(bytes: &[u8]) -> Result<ObjectHash, String> {
199        let expected_len = get_hash_kind().size();
200        if bytes.len() != expected_len {
201            return Err(format!(
202                "Invalid byte length: got {}, expected {}",
203                bytes.len(),
204                expected_len
205            ));
206        }
207
208        match get_hash_kind() {
209            HashKind::Sha1 => {
210                let mut h = [0u8; 20];
211                h.copy_from_slice(bytes);
212                Ok(ObjectHash::Sha1(h))
213            }
214            HashKind::Sha256 => {
215                let mut h = [0u8; 32];
216                h.copy_from_slice(bytes);
217                Ok(ObjectHash::Sha256(h))
218            }
219        }
220    }
221    /// Create ObjectHash from a stream
222    pub fn from_stream(data: &mut impl io::Read) -> io::Result<ObjectHash> {
223        match get_hash_kind() {
224            HashKind::Sha1 => {
225                let mut h = [0u8; 20];
226                data.read_exact(&mut h)?;
227                Ok(ObjectHash::Sha1(h))
228            }
229            HashKind::Sha256 => {
230                let mut h = [0u8; 32];
231                data.read_exact(&mut h)?;
232                Ok(ObjectHash::Sha256(h))
233            }
234        }
235    }
236
237    /// Export sha1 value to String with the color
238    pub fn to_color_str(self) -> String {
239        self.to_string().red().bold().to_string()
240    }
241
242    /// Export sha1 value to a byte array
243    pub fn to_data(self) -> Vec<u8> {
244        self.as_ref().to_vec()
245    }
246
247    /// [`core::fmt::Display`] is somewhat expensive,
248    /// use this hack to get a string more efficiently
249    pub fn _to_string(&self) -> String {
250        hex::encode(self.as_ref())
251    }
252
253    /// Get mutable hash as byte slice
254    pub fn as_mut_bytes(&mut self) -> &mut [u8] {
255        match self {
256            ObjectHash::Sha1(bytes) => bytes.as_mut_slice(),
257            ObjectHash::Sha256(bytes) => bytes.as_mut_slice(),
258        }
259    }
260}
261thread_local! {
262    /// Thread-local variable to store the current hash kind.
263    /// This allows different threads to work with different hash algorithms concurrently
264    /// without interfering with each other.
265    static CURRENT_HASH_KIND: RefCell<HashKind> = RefCell::new(HashKind::default());
266}
267pub fn set_hash_kind(kind: HashKind) {
268    CURRENT_HASH_KIND.with(|h| {
269        *h.borrow_mut() = kind;
270    });
271}
272
273/// Retrieves the hash kind for the current thread.
274pub fn get_hash_kind() -> HashKind {
275    CURRENT_HASH_KIND.with(|h| *h.borrow())
276}
277/// A guard to reset the hash kind after the test
278pub struct HashKindGuard {
279    prev: HashKind,
280}
281/// Implementation of the `Drop` trait for the `HashKindGuard` struct.
282impl Drop for HashKindGuard {
283    fn drop(&mut self) {
284        set_hash_kind(self.prev);
285    }
286}
287/// Sets the hash kind for the current thread and returns a guard to reset it later.
288pub fn set_hash_kind_for_test(kind: HashKind) -> HashKindGuard {
289    let prev = get_hash_kind();
290    set_hash_kind(kind);
291    HashKindGuard { prev }
292}
293#[cfg(test)]
294mod tests {
295
296    use std::io::BufReader;
297    use std::io::Read;
298    use std::io::Seek;
299    use std::io::SeekFrom;
300    use std::str::FromStr;
301    use std::{env, path::PathBuf};
302
303    use crate::hash::{HashKind, ObjectHash, set_hash_kind_for_test};
304
305    #[test]
306    fn test_sha1_new() {
307        // Set hash kind to SHA1 for this test
308        let _guard = set_hash_kind_for_test(HashKind::Sha1);
309        // Example input
310        let data = "Hello, world!".as_bytes();
311
312        // Generate SHA1 hash from the input data
313        let sha1 = ObjectHash::new(data);
314
315        // Known SHA1 hash for "Hello, world!"
316        let expected_sha1_hash = "943a702d06f34599aee1f8da8ef9f7296031d699";
317
318        assert_eq!(sha1.to_string(), expected_sha1_hash);
319    }
320    #[test]
321    fn test_sha256_new() {
322        let _guard = set_hash_kind_for_test(HashKind::Sha256);
323        let data = "Hello, world!".as_bytes();
324        let sha256 = ObjectHash::new(data);
325        let expected_sha256_hash =
326            "315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3";
327        assert_eq!(sha256.to_string(), expected_sha256_hash);
328    }
329
330    #[test]
331    fn test_signature_without_delta() {
332        let _guard = set_hash_kind_for_test(HashKind::Sha1);
333        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
334        source.push("tests/data/packs/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack");
335
336        let f = std::fs::File::open(source).unwrap();
337        let mut buffered = BufReader::new(f);
338
339        buffered.seek(SeekFrom::End(-20)).unwrap();
340        let mut buffer = vec![0; 20];
341        buffered.read_exact(&mut buffer).unwrap();
342        let signature = ObjectHash::from_bytes(buffer.as_ref()).unwrap();
343        assert_eq!(
344            signature.to_string(),
345            "1d0e6c14760c956c173ede71cb28f33d921e232f"
346        );
347    }
348    #[test]
349    fn test_signature_without_delta_sha256() {
350        let _guard = set_hash_kind_for_test(HashKind::Sha256);
351        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
352        source.push("tests/data/packs/pack-78047853c60a1a3bb587f59598bdeb773fefc821f6f60f4f4797644ad43dad3d.pack");
353
354        let f = std::fs::File::open(source).unwrap();
355        let mut buffered = BufReader::new(f);
356
357        buffered.seek(SeekFrom::End(-32)).unwrap();
358        let mut buffer = vec![0; 32];
359        buffered.read_exact(&mut buffer).unwrap();
360        let signature = ObjectHash::from_bytes(buffer.as_ref()).unwrap();
361        assert_eq!(
362            signature.to_string(),
363            "78047853c60a1a3bb587f59598bdeb773fefc821f6f60f4f4797644ad43dad3d"
364        );
365    }
366
367    #[test]
368    fn test_sha1_from_bytes() {
369        let _guard = set_hash_kind_for_test(HashKind::Sha1);
370        let sha1 = ObjectHash::from_bytes(&[
371            0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
372            0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
373        ])
374        .unwrap();
375
376        assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
377    }
378    #[test]
379    fn test_sha256_from_bytes() {
380        let _guard = set_hash_kind_for_test(HashKind::Sha256);
381        // Pre-calculated SHA256 hash for "abc"
382        let sha256 = ObjectHash::from_bytes(&[
383            0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae,
384            0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61,
385            0xf2, 0x00, 0x15, 0xad,
386        ])
387        .unwrap();
388
389        assert_eq!(
390            sha256.to_string(),
391            "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
392        );
393    }
394
395    #[test]
396    fn test_from_stream() {
397        let _guard = set_hash_kind_for_test(HashKind::Sha1);
398        let source = [
399            0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
400            0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
401        ];
402        let mut reader = std::io::Cursor::new(source);
403        let sha1 = ObjectHash::from_stream(&mut reader).unwrap();
404        assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
405    }
406    #[test]
407    fn test_sha256_from_stream() {
408        let _guard = set_hash_kind_for_test(HashKind::Sha256);
409        let source = [
410            0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae,
411            0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61,
412            0xf2, 0x00, 0x15, 0xad,
413        ];
414        let mut reader = std::io::Cursor::new(source);
415        let sha256 = ObjectHash::from_stream(&mut reader).unwrap();
416        assert_eq!(
417            sha256.to_string(),
418            "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
419        );
420    }
421    #[test]
422    fn test_sha1_from_str() {
423        let _guard = set_hash_kind_for_test(HashKind::Sha1);
424        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
425
426        match ObjectHash::from_str(hash_str) {
427            Ok(hash) => {
428                assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
429            }
430            Err(e) => println!("Error: {e}"),
431        }
432    }
433    #[test]
434    fn test_sha256_from_str() {
435        let _guard = set_hash_kind_for_test(HashKind::Sha256);
436        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
437
438        match ObjectHash::from_str(hash_str) {
439            Ok(hash) => {
440                assert_eq!(
441                    hash.to_string(),
442                    "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
443                );
444            }
445            Err(e) => println!("Error: {e}"),
446        }
447    }
448    #[test]
449    fn test_sha1_to_string() {
450        let _guard = set_hash_kind_for_test(HashKind::Sha1);
451        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
452
453        match ObjectHash::from_str(hash_str) {
454            Ok(hash) => {
455                assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
456            }
457            Err(e) => println!("Error: {e}"),
458        }
459    }
460    #[test]
461    fn test_sha256_to_string() {
462        let _guard = set_hash_kind_for_test(HashKind::Sha256);
463        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
464        match ObjectHash::from_str(hash_str) {
465            Ok(hash) => {
466                assert_eq!(
467                    hash.to_string(),
468                    "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
469                );
470            }
471            Err(e) => println!("Error: {e}"),
472        }
473    }
474    #[test]
475    fn test_sha1_to_data() {
476        let _guard = set_hash_kind_for_test(HashKind::Sha1);
477        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
478
479        match ObjectHash::from_str(hash_str) {
480            Ok(hash) => {
481                assert_eq!(
482                    hash.to_data(),
483                    vec![
484                        0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b,
485                        0x0f, 0x24, 0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d
486                    ]
487                );
488            }
489            Err(e) => println!("Error: {e}"),
490        }
491    }
492    #[test]
493    fn test_sha256_to_data() {
494        let _guard = set_hash_kind_for_test(HashKind::Sha256);
495        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
496        match ObjectHash::from_str(hash_str) {
497            Ok(hash) => {
498                assert_eq!(
499                    hash.to_data(),
500                    vec![
501                        0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde,
502                        0x5d, 0xae, 0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c,
503                        0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad,
504                    ]
505                );
506            }
507            Err(e) => println!("Error: {e}"),
508        }
509    }
510}