git_internal/
hash.rs

1//! Hash utilities for Git objects with selectable algorithms (SHA-1 and SHA-256).
2//! Hash kind is stored thread-locally; set once at startup to match your repository format.
3//! Defaults to SHA-1.
4
5use std::{cell::RefCell, fmt::Display, hash::Hash, io, str::FromStr};
6
7use bincode::{Decode, Encode};
8use colored::Colorize;
9use serde::{Deserialize, Serialize};
10use sha1::Digest;
11
12use crate::internal::object::types::ObjectType;
13
14/// Supported hash algorithms for object IDs (selector only, no data attached).
15/// Used to configure which hash algorithm to use globally (thread-local).
16/// Defaults to SHA-1.
17#[derive(
18    Clone,
19    Copy,
20    Debug,
21    PartialEq,
22    Eq,
23    Hash,
24    PartialOrd,
25    Ord,
26    Default,
27    Deserialize,
28    Serialize,
29    Encode,
30    Decode,
31)]
32pub enum HashKind {
33    #[default]
34    Sha1,
35    Sha256,
36}
37impl HashKind {
38    /// Byte length of the hash output.
39    pub const fn size(&self) -> usize {
40        match self {
41            HashKind::Sha1 => 20,
42            HashKind::Sha256 => 32,
43            // Add more hash kinds here as needed
44        }
45    }
46    /// Hex string length of the hash output.
47    pub const fn hex_len(&self) -> usize {
48        match self {
49            HashKind::Sha1 => 40,
50            HashKind::Sha256 => 64,
51        }
52    }
53    /// Lowercase name of the hash algorithm.
54    pub const fn as_str(&self) -> &'static str {
55        match self {
56            HashKind::Sha1 => "sha1",
57            HashKind::Sha256 => "sha256",
58        }
59    }
60}
61impl std::fmt::Display for HashKind {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        f.write_str(self.as_str())
64    }
65}
66impl std::str::FromStr for HashKind {
67    type Err = String;
68
69    fn from_str(s: &str) -> Result<Self, Self::Err> {
70        match s.to_ascii_lowercase().as_str() {
71            "sha1" => Ok(HashKind::Sha1),
72            "sha256" => Ok(HashKind::Sha256),
73            _ => Err("Invalid hash kind".to_string()),
74        }
75    }
76}
77
78#[derive(
79    Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Deserialize, Serialize, Encode, Decode,
80)]
81/// Concrete object ID value carrying the bytes for the selected algorithm (SHA-1 or SHA-256).
82/// Used for Git object hashes.
83/// Supports conversion to/from hex strings, byte slices, and stream reading.
84pub enum ObjectHash {
85    Sha1([u8; 20]),
86    Sha256([u8; 32]),
87}
88impl Default for ObjectHash {
89    fn default() -> Self {
90        ObjectHash::Sha1([0u8; 20])
91    }
92}
93impl Display for ObjectHash {
94    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
95        write!(f, "{}", hex::encode(self.as_ref()))
96    }
97}
98impl AsRef<[u8]> for ObjectHash {
99    fn as_ref(&self) -> &[u8] {
100        match self {
101            ObjectHash::Sha1(bytes) => bytes.as_slice(),
102            ObjectHash::Sha256(bytes) => bytes.as_slice(),
103        }
104    }
105}
106/// Parse hex (40 for SHA1, 64 for SHA-256) into `ObjectHash`.
107impl FromStr for ObjectHash {
108    type Err = String;
109
110    fn from_str(s: &str) -> Result<Self, Self::Err> {
111        match s.len() {
112            40 => {
113                let mut h = [0u8; 20];
114                let bytes = hex::decode(s).map_err(|e| e.to_string())?;
115                h.copy_from_slice(bytes.as_slice());
116                Ok(ObjectHash::Sha1(h))
117            }
118            64 => {
119                let mut h = [0u8; 32];
120                let bytes = hex::decode(s).map_err(|e| e.to_string())?;
121                h.copy_from_slice(bytes.as_slice());
122                Ok(ObjectHash::Sha256(h))
123            }
124            _ => Err("Invalid hash length".to_string()),
125        }
126    }
127}
128
129impl ObjectHash {
130    /// Zero-filled hex string for a given hash kind.
131    pub fn zero_str(kind: HashKind) -> String {
132        match kind {
133            HashKind::Sha1 => "0000000000000000000000000000000000000000".to_string(),
134            HashKind::Sha256 => {
135                "0000000000000000000000000000000000000000000000000000000000000000".to_string()
136            }
137        }
138    }
139
140    /// Return the hash kind for this value.
141    pub fn kind(&self) -> HashKind {
142        match self {
143            ObjectHash::Sha1(_) => HashKind::Sha1,
144            ObjectHash::Sha256(_) => HashKind::Sha256,
145        }
146    }
147    /// Return the hash size in bytes.
148    pub fn size(&self) -> usize {
149        self.kind().size()
150    }
151
152    /// Compute hash of data using current thread-local `HashKind`.
153    pub fn new(data: &[u8]) -> ObjectHash {
154        match get_hash_kind() {
155            HashKind::Sha1 => {
156                let h = sha1::Sha1::digest(data);
157                let mut bytes = [0u8; 20];
158                bytes.copy_from_slice(h.as_ref());
159                ObjectHash::Sha1(bytes)
160            }
161            HashKind::Sha256 => {
162                let h = sha2::Sha256::digest(data);
163                let mut bytes = [0u8; 32];
164                bytes.copy_from_slice(h.as_ref());
165                ObjectHash::Sha256(bytes)
166            }
167        }
168    }
169    /// Create ObjectHash from object type and data
170    pub fn from_type_and_data(object_type: ObjectType, data: &[u8]) -> ObjectHash {
171        let mut d: Vec<u8> = Vec::new();
172        d.extend(object_type.to_data().unwrap());
173        d.push(b' ');
174        d.extend(data.len().to_string().as_bytes());
175        d.push(b'\x00');
176        d.extend(data);
177        ObjectHash::new(&d)
178    }
179    /// Create `ObjectHash` from raw bytes matching the current hash size.
180    pub fn from_bytes(bytes: &[u8]) -> Result<ObjectHash, String> {
181        let expected_len = get_hash_kind().size();
182        if bytes.len() != expected_len {
183            return Err(format!(
184                "Invalid byte length: got {}, expected {}",
185                bytes.len(),
186                expected_len
187            ));
188        }
189
190        match get_hash_kind() {
191            HashKind::Sha1 => {
192                let mut h = [0u8; 20];
193                h.copy_from_slice(bytes);
194                Ok(ObjectHash::Sha1(h))
195            }
196            HashKind::Sha256 => {
197                let mut h = [0u8; 32];
198                h.copy_from_slice(bytes);
199                Ok(ObjectHash::Sha256(h))
200            }
201        }
202    }
203    /// Read hash bytes from a stream according to current hash size.
204    pub fn from_stream(data: &mut impl io::Read) -> io::Result<ObjectHash> {
205        match get_hash_kind() {
206            HashKind::Sha1 => {
207                let mut h = [0u8; 20];
208                data.read_exact(&mut h)?;
209                Ok(ObjectHash::Sha1(h))
210            }
211            HashKind::Sha256 => {
212                let mut h = [0u8; 32];
213                data.read_exact(&mut h)?;
214                Ok(ObjectHash::Sha256(h))
215            }
216        }
217    }
218
219    /// Format hash as colored string (for terminal display).
220    pub fn to_color_str(self) -> String {
221        self.to_string().red().bold().to_string()
222    }
223
224    /// Return raw bytes of the hash.
225    pub fn to_data(self) -> Vec<u8> {
226        self.as_ref().to_vec()
227    }
228
229    /// Faster string conversion than `Display`.
230    pub fn _to_string(&self) -> String {
231        hex::encode(self.as_ref())
232    }
233
234    /// Get mutable access to inner byte slice.
235    pub fn as_mut_bytes(&mut self) -> &mut [u8] {
236        match self {
237            ObjectHash::Sha1(bytes) => bytes.as_mut_slice(),
238            ObjectHash::Sha256(bytes) => bytes.as_mut_slice(),
239        }
240    }
241}
242thread_local! {
243    /// Thread-local variable to store the current hash kind.
244    /// This allows different threads to work with different hash algorithms concurrently
245    /// without interfering with each other.
246    static CURRENT_HASH_KIND: RefCell<HashKind> = RefCell::new(HashKind::default());
247}
248/// Set the thread-local hash kind (configure once at startup to match repo format).
249pub fn set_hash_kind(kind: HashKind) {
250    CURRENT_HASH_KIND.with(|h| {
251        *h.borrow_mut() = kind;
252    });
253}
254
255/// Retrieves the hash kind for the current thread.
256pub fn get_hash_kind() -> HashKind {
257    CURRENT_HASH_KIND.with(|h| *h.borrow())
258}
259/// A guard to reset the hash kind after the test
260pub struct HashKindGuard {
261    prev: HashKind,
262}
263/// Implementation of the `Drop` trait for the `HashKindGuard` struct.
264impl Drop for HashKindGuard {
265    fn drop(&mut self) {
266        set_hash_kind(self.prev);
267    }
268}
269/// Sets the hash kind for the current thread and returns a guard to reset it later.
270pub fn set_hash_kind_for_test(kind: HashKind) -> HashKindGuard {
271    let prev = get_hash_kind();
272    set_hash_kind(kind);
273    HashKindGuard { prev }
274}
275#[cfg(test)]
276mod tests {
277
278    use std::{
279        env,
280        io::{BufReader, Read, Seek, SeekFrom},
281        path::PathBuf,
282        str::FromStr,
283    };
284
285    use crate::hash::{HashKind, ObjectHash, set_hash_kind_for_test};
286
287    /// Hashing "Hello, world!" with SHA1 should match known value.
288    #[test]
289    fn test_sha1_new() {
290        // Set hash kind to SHA1 for this test
291        let _guard = set_hash_kind_for_test(HashKind::Sha1);
292        // Example input
293        let data = "Hello, world!".as_bytes();
294
295        // Generate SHA1 hash from the input data
296        let sha1 = ObjectHash::new(data);
297
298        // Known SHA1 hash for "Hello, world!"
299        let expected_sha1_hash = "943a702d06f34599aee1f8da8ef9f7296031d699";
300
301        assert_eq!(sha1.to_string(), expected_sha1_hash);
302    }
303
304    /// Hashing "Hello, world!" with SHA256 should match known value.
305    #[test]
306    fn test_sha256_new() {
307        let _guard = set_hash_kind_for_test(HashKind::Sha256);
308        let data = "Hello, world!".as_bytes();
309        let sha256 = ObjectHash::new(data);
310        let expected_sha256_hash =
311            "315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3";
312        assert_eq!(sha256.to_string(), expected_sha256_hash);
313    }
314
315    /// Read pack trailer for SHA1 pack should yield SHA1 hash.
316    #[test]
317    fn test_signature_without_delta() {
318        let _guard = set_hash_kind_for_test(HashKind::Sha1);
319        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
320        source.push("tests/data/packs/small-sha1.pack");
321
322        let f = std::fs::File::open(source).unwrap();
323        let mut buffered = BufReader::new(f);
324
325        buffered.seek(SeekFrom::End(-20)).unwrap();
326        let mut buffer = vec![0; 20];
327        buffered.read_exact(&mut buffer).unwrap();
328        let signature = ObjectHash::from_bytes(buffer.as_ref()).unwrap();
329        assert_eq!(signature.kind(), HashKind::Sha1);
330    }
331
332    /// Read pack trailer for SHA256 pack should yield SHA256 hash.
333    #[test]
334    fn test_signature_without_delta_sha256() {
335        let _guard = set_hash_kind_for_test(HashKind::Sha256);
336        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
337        source.push("tests/data/packs/small-sha256.pack");
338
339        let f = std::fs::File::open(source).unwrap();
340        let mut buffered = BufReader::new(f);
341
342        buffered.seek(SeekFrom::End(-32)).unwrap();
343        let mut buffer = vec![0; 32];
344        buffered.read_exact(&mut buffer).unwrap();
345        let signature = ObjectHash::from_bytes(buffer.as_ref()).unwrap();
346        assert_eq!(signature.kind(), HashKind::Sha256);
347    }
348
349    /// Construct SHA1 from raw bytes.
350    #[test]
351    fn test_sha1_from_bytes() {
352        let _guard = set_hash_kind_for_test(HashKind::Sha1);
353        let sha1 = ObjectHash::from_bytes(&[
354            0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
355            0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
356        ])
357        .unwrap();
358
359        assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
360    }
361
362    /// Construct SHA256 from raw bytes.
363    #[test]
364    fn test_sha256_from_bytes() {
365        let _guard = set_hash_kind_for_test(HashKind::Sha256);
366        // Pre-calculated SHA256 hash for "abc"
367        let sha256 = ObjectHash::from_bytes(&[
368            0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae,
369            0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61,
370            0xf2, 0x00, 0x15, 0xad,
371        ])
372        .unwrap();
373
374        assert_eq!(
375            sha256.to_string(),
376            "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
377        );
378    }
379
380    /// Read hash from stream for SHA1.
381    #[test]
382    fn test_from_stream() {
383        let _guard = set_hash_kind_for_test(HashKind::Sha1);
384        let source = [
385            0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
386            0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
387        ];
388        let mut reader = std::io::Cursor::new(source);
389        let sha1 = ObjectHash::from_stream(&mut reader).unwrap();
390        assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
391    }
392
393    /// Read hash from stream for SHA256.
394    #[test]
395    fn test_sha256_from_stream() {
396        let _guard = set_hash_kind_for_test(HashKind::Sha256);
397        let source = [
398            0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae,
399            0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61,
400            0xf2, 0x00, 0x15, 0xad,
401        ];
402        let mut reader = std::io::Cursor::new(source);
403        let sha256 = ObjectHash::from_stream(&mut reader).unwrap();
404        assert_eq!(
405            sha256.to_string(),
406            "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
407        );
408    }
409
410    /// Parse SHA1 from hex string.
411    #[test]
412    fn test_sha1_from_str() {
413        let _guard = set_hash_kind_for_test(HashKind::Sha1);
414        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
415
416        match ObjectHash::from_str(hash_str) {
417            Ok(hash) => {
418                assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
419            }
420            Err(e) => println!("Error: {e}"),
421        }
422    }
423
424    /// Parse SHA256 from hex string.
425    #[test]
426    fn test_sha256_from_str() {
427        let _guard = set_hash_kind_for_test(HashKind::Sha256);
428        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
429
430        match ObjectHash::from_str(hash_str) {
431            Ok(hash) => {
432                assert_eq!(
433                    hash.to_string(),
434                    "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
435                );
436            }
437            Err(e) => println!("Error: {e}"),
438        }
439    }
440
441    /// SHA1 to_string should round-trip.
442    #[test]
443    fn test_sha1_to_string() {
444        let _guard = set_hash_kind_for_test(HashKind::Sha1);
445        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
446
447        match ObjectHash::from_str(hash_str) {
448            Ok(hash) => {
449                assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
450            }
451            Err(e) => println!("Error: {e}"),
452        }
453    }
454
455    /// SHA256 to_string should round-trip.
456    #[test]
457    fn test_sha256_to_string() {
458        let _guard = set_hash_kind_for_test(HashKind::Sha256);
459        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
460        match ObjectHash::from_str(hash_str) {
461            Ok(hash) => {
462                assert_eq!(
463                    hash.to_string(),
464                    "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
465                );
466            }
467            Err(e) => println!("Error: {e}"),
468        }
469    }
470
471    /// SHA1 to_data should produce expected bytes.
472    #[test]
473    fn test_sha1_to_data() {
474        let _guard = set_hash_kind_for_test(HashKind::Sha1);
475        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
476
477        match ObjectHash::from_str(hash_str) {
478            Ok(hash) => {
479                assert_eq!(
480                    hash.to_data(),
481                    vec![
482                        0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b,
483                        0x0f, 0x24, 0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d
484                    ]
485                );
486            }
487            Err(e) => println!("Error: {e}"),
488        }
489    }
490
491    /// SHA256 to_data should produce expected bytes.
492    #[test]
493    fn test_sha256_to_data() {
494        let _guard = set_hash_kind_for_test(HashKind::Sha256);
495        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
496        match ObjectHash::from_str(hash_str) {
497            Ok(hash) => {
498                assert_eq!(
499                    hash.to_data(),
500                    vec![
501                        0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde,
502                        0x5d, 0xae, 0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c,
503                        0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad,
504                    ]
505                );
506            }
507            Err(e) => println!("Error: {e}"),
508        }
509    }
510}