Skip to main content

git_internal/
hash.rs

1//! Hash utilities for Git objects with selectable algorithms (SHA-1 and SHA-256).
2//! Hash kind is stored thread-locally; set once at startup to match your repository format.
3//! Defaults to SHA-1.
4
5use std::{cell::RefCell, fmt::Display, hash::Hash, io, str::FromStr};
6
7use bincode::{Decode, Encode};
8use colored::Colorize;
9use serde::{Deserialize, Serialize};
10use sha1::Digest;
11
12use crate::internal::object::types::ObjectType;
13
14/// Supported hash algorithms for object IDs (selector only, no data attached).
15/// Used to configure which hash algorithm to use globally (thread-local).
16/// Defaults to SHA-1.
17#[derive(
18    Clone,
19    Copy,
20    Debug,
21    PartialEq,
22    Eq,
23    Hash,
24    PartialOrd,
25    Ord,
26    Default,
27    Deserialize,
28    Serialize,
29    Encode,
30    Decode,
31)]
32pub enum HashKind {
33    #[default]
34    Sha1,
35    Sha256,
36}
37impl HashKind {
38    /// Byte length of the hash output.
39    pub const fn size(&self) -> usize {
40        match self {
41            HashKind::Sha1 => 20,
42            HashKind::Sha256 => 32,
43            // Add more hash kinds here as needed
44        }
45    }
46    /// Hex string length of the hash output.
47    pub const fn hex_len(&self) -> usize {
48        match self {
49            HashKind::Sha1 => 40,
50            HashKind::Sha256 => 64,
51        }
52    }
53    /// Lowercase name of the hash algorithm.
54    pub const fn as_str(&self) -> &'static str {
55        match self {
56            HashKind::Sha1 => "sha1",
57            HashKind::Sha256 => "sha256",
58        }
59    }
60}
61impl std::fmt::Display for HashKind {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        f.write_str(self.as_str())
64    }
65}
66impl std::str::FromStr for HashKind {
67    type Err = String;
68
69    fn from_str(s: &str) -> Result<Self, Self::Err> {
70        match s.to_ascii_lowercase().as_str() {
71            "sha1" => Ok(HashKind::Sha1),
72            "sha256" => Ok(HashKind::Sha256),
73            _ => Err("Invalid hash kind".to_string()),
74        }
75    }
76}
77
78#[derive(
79    Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Deserialize, Serialize, Encode, Decode,
80)]
81/// Concrete object ID value carrying the bytes for the selected algorithm (SHA-1 or SHA-256).
82/// Used for Git object hashes.
83/// Supports conversion to/from hex strings, byte slices, and stream reading.
84pub enum ObjectHash {
85    Sha1([u8; 20]),
86    Sha256([u8; 32]),
87}
88impl Default for ObjectHash {
89    fn default() -> Self {
90        ObjectHash::Sha1([0u8; 20])
91    }
92}
93impl Display for ObjectHash {
94    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
95        write!(f, "{}", hex::encode(self.as_ref()))
96    }
97}
98impl AsRef<[u8]> for ObjectHash {
99    fn as_ref(&self) -> &[u8] {
100        match self {
101            ObjectHash::Sha1(bytes) => bytes.as_slice(),
102            ObjectHash::Sha256(bytes) => bytes.as_slice(),
103        }
104    }
105}
106/// Parse hex (40 for SHA1, 64 for SHA-256) into `ObjectHash`.
107impl FromStr for ObjectHash {
108    type Err = String;
109
110    fn from_str(s: &str) -> Result<Self, Self::Err> {
111        match s.len() {
112            40 => {
113                let mut h = [0u8; 20];
114                let bytes = hex::decode(s).map_err(|e| e.to_string())?;
115                h.copy_from_slice(bytes.as_slice());
116                Ok(ObjectHash::Sha1(h))
117            }
118            64 => {
119                let mut h = [0u8; 32];
120                let bytes = hex::decode(s).map_err(|e| e.to_string())?;
121                h.copy_from_slice(bytes.as_slice());
122                Ok(ObjectHash::Sha256(h))
123            }
124            _ => Err("Invalid hash length".to_string()),
125        }
126    }
127}
128
129impl ObjectHash {
130    /// Zero-filled hex string for a given hash kind.
131    pub fn zero_str(kind: HashKind) -> String {
132        match kind {
133            HashKind::Sha1 => "0000000000000000000000000000000000000000".to_string(),
134            HashKind::Sha256 => {
135                "0000000000000000000000000000000000000000000000000000000000000000".to_string()
136            }
137        }
138    }
139
140    /// Return the hash kind for this value.
141    pub fn kind(&self) -> HashKind {
142        match self {
143            ObjectHash::Sha1(_) => HashKind::Sha1,
144            ObjectHash::Sha256(_) => HashKind::Sha256,
145        }
146    }
147    /// Return the hash size in bytes.
148    pub fn size(&self) -> usize {
149        self.kind().size()
150    }
151
152    /// Compute hash of data using current thread-local `HashKind`.
153    pub fn new(data: &[u8]) -> ObjectHash {
154        match get_hash_kind() {
155            HashKind::Sha1 => {
156                let h = sha1::Sha1::digest(data);
157                let mut bytes = [0u8; 20];
158                bytes.copy_from_slice(h.as_ref());
159                ObjectHash::Sha1(bytes)
160            }
161            HashKind::Sha256 => {
162                let h = sha2::Sha256::digest(data);
163                let mut bytes = [0u8; 32];
164                bytes.copy_from_slice(h.as_ref());
165                ObjectHash::Sha256(bytes)
166            }
167        }
168    }
169    /// Create ObjectHash from object type and data
170    pub fn from_type_and_data(object_type: ObjectType, data: &[u8]) -> ObjectHash {
171        let mut d: Vec<u8> = Vec::new();
172        d.extend(object_type.to_data().unwrap());
173        d.push(b' ');
174        d.extend(data.len().to_string().as_bytes());
175        d.push(b'\x00');
176        d.extend(data);
177        ObjectHash::new(&d)
178    }
179    /// Create `ObjectHash` from raw bytes matching the current hash size.
180    pub fn from_bytes(bytes: &[u8]) -> Result<ObjectHash, String> {
181        let expected_len = get_hash_kind().size();
182        if bytes.len() != expected_len {
183            return Err(format!(
184                "Invalid byte length: got {}, expected {}",
185                bytes.len(),
186                expected_len
187            ));
188        }
189
190        match get_hash_kind() {
191            HashKind::Sha1 => {
192                let mut h = [0u8; 20];
193                h.copy_from_slice(bytes);
194                Ok(ObjectHash::Sha1(h))
195            }
196            HashKind::Sha256 => {
197                let mut h = [0u8; 32];
198                h.copy_from_slice(bytes);
199                Ok(ObjectHash::Sha256(h))
200            }
201        }
202    }
203    /// Read hash bytes from a stream according to current hash size.
204    pub fn from_stream(data: &mut impl io::Read) -> io::Result<ObjectHash> {
205        match get_hash_kind() {
206            HashKind::Sha1 => {
207                let mut h = [0u8; 20];
208                data.read_exact(&mut h)?;
209                Ok(ObjectHash::Sha1(h))
210            }
211            HashKind::Sha256 => {
212                let mut h = [0u8; 32];
213                data.read_exact(&mut h)?;
214                Ok(ObjectHash::Sha256(h))
215            }
216        }
217    }
218
219    /// Format hash as colored string (for terminal display).
220    pub fn to_color_str(self) -> String {
221        self.to_string().red().bold().to_string()
222    }
223
224    /// Return raw bytes of the hash.
225    pub fn to_data(self) -> Vec<u8> {
226        self.as_ref().to_vec()
227    }
228
229    /// Faster string conversion than `Display`.
230    pub fn _to_string(&self) -> String {
231        hex::encode(self.as_ref())
232    }
233
234    /// Get mutable access to inner byte slice.
235    pub fn as_mut_bytes(&mut self) -> &mut [u8] {
236        match self {
237            ObjectHash::Sha1(bytes) => bytes.as_mut_slice(),
238            ObjectHash::Sha256(bytes) => bytes.as_mut_slice(),
239        }
240    }
241}
242
243thread_local! {
244    /// Thread-local variable to store the current hash kind.
245    /// This allows different threads to work with different hash algorithms concurrently
246    /// without interfering with each other.
247    static CURRENT_HASH_KIND: RefCell<HashKind> = RefCell::new(HashKind::default());
248}
249/// Set the thread-local hash kind (configure once at startup to match repo format).
250pub fn set_hash_kind(kind: HashKind) {
251    CURRENT_HASH_KIND.with(|h| {
252        *h.borrow_mut() = kind;
253    });
254}
255
256/// Retrieves the hash kind for the current thread.
257pub fn get_hash_kind() -> HashKind {
258    CURRENT_HASH_KIND.with(|h| *h.borrow())
259}
260/// A guard to reset the hash kind after the test
261pub struct HashKindGuard {
262    prev: HashKind,
263}
264/// Implementation of the `Drop` trait for the `HashKindGuard` struct.
265impl Drop for HashKindGuard {
266    fn drop(&mut self) {
267        set_hash_kind(self.prev);
268    }
269}
270/// Sets the hash kind for the current thread and returns a guard to reset it later.
271pub fn set_hash_kind_for_test(kind: HashKind) -> HashKindGuard {
272    let prev = get_hash_kind();
273    set_hash_kind(kind);
274    HashKindGuard { prev }
275}
276#[cfg(test)]
277mod tests {
278
279    use std::{
280        env,
281        io::{BufReader, Read, Seek, SeekFrom},
282        path::PathBuf,
283        str::FromStr,
284    };
285
286    use crate::hash::{HashKind, ObjectHash, set_hash_kind_for_test};
287
288    /// Hashing "Hello, world!" with SHA1 should match known value.
289    #[test]
290    fn test_sha1_new() {
291        // Set hash kind to SHA1 for this test
292        let _guard = set_hash_kind_for_test(HashKind::Sha1);
293        // Example input
294        let data = "Hello, world!".as_bytes();
295
296        // Generate SHA1 hash from the input data
297        let sha1 = ObjectHash::new(data);
298
299        // Known SHA1 hash for "Hello, world!"
300        let expected_sha1_hash = "943a702d06f34599aee1f8da8ef9f7296031d699";
301
302        assert_eq!(sha1.to_string(), expected_sha1_hash);
303    }
304
305    /// Hashing "Hello, world!" with SHA256 should match known value.
306    #[test]
307    fn test_sha256_new() {
308        let _guard = set_hash_kind_for_test(HashKind::Sha256);
309        let data = "Hello, world!".as_bytes();
310        let sha256 = ObjectHash::new(data);
311        let expected_sha256_hash =
312            "315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3";
313        assert_eq!(sha256.to_string(), expected_sha256_hash);
314    }
315
316    /// Read pack trailer for SHA1 pack should yield SHA1 hash.
317    #[test]
318    fn test_signature_without_delta() {
319        let _guard = set_hash_kind_for_test(HashKind::Sha1);
320        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
321        source.push("tests/data/packs/small-sha1.pack");
322
323        let f = std::fs::File::open(source).unwrap();
324        let mut buffered = BufReader::new(f);
325
326        buffered.seek(SeekFrom::End(-20)).unwrap();
327        let mut buffer = vec![0; 20];
328        buffered.read_exact(&mut buffer).unwrap();
329        let signature = ObjectHash::from_bytes(buffer.as_ref()).unwrap();
330        assert_eq!(signature.kind(), HashKind::Sha1);
331    }
332
333    /// Read pack trailer for SHA256 pack should yield SHA256 hash.
334    #[test]
335    fn test_signature_without_delta_sha256() {
336        let _guard = set_hash_kind_for_test(HashKind::Sha256);
337        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
338        source.push("tests/data/packs/small-sha256.pack");
339
340        let f = std::fs::File::open(source).unwrap();
341        let mut buffered = BufReader::new(f);
342
343        buffered.seek(SeekFrom::End(-32)).unwrap();
344        let mut buffer = vec![0; 32];
345        buffered.read_exact(&mut buffer).unwrap();
346        let signature = ObjectHash::from_bytes(buffer.as_ref()).unwrap();
347        assert_eq!(signature.kind(), HashKind::Sha256);
348    }
349
350    /// Construct SHA1 from raw bytes.
351    #[test]
352    fn test_sha1_from_bytes() {
353        let _guard = set_hash_kind_for_test(HashKind::Sha1);
354        let sha1 = ObjectHash::from_bytes(&[
355            0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
356            0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
357        ])
358        .unwrap();
359
360        assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
361    }
362
363    /// Construct SHA256 from raw bytes.
364    #[test]
365    fn test_sha256_from_bytes() {
366        let _guard = set_hash_kind_for_test(HashKind::Sha256);
367        // Pre-calculated SHA256 hash for "abc"
368        let sha256 = ObjectHash::from_bytes(&[
369            0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae,
370            0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61,
371            0xf2, 0x00, 0x15, 0xad,
372        ])
373        .unwrap();
374
375        assert_eq!(
376            sha256.to_string(),
377            "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
378        );
379    }
380
381    /// Read hash from stream for SHA1.
382    #[test]
383    fn test_from_stream() {
384        let _guard = set_hash_kind_for_test(HashKind::Sha1);
385        let source = [
386            0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
387            0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
388        ];
389        let mut reader = std::io::Cursor::new(source);
390        let sha1 = ObjectHash::from_stream(&mut reader).unwrap();
391        assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
392    }
393
394    /// Read hash from stream for SHA256.
395    #[test]
396    fn test_sha256_from_stream() {
397        let _guard = set_hash_kind_for_test(HashKind::Sha256);
398        let source = [
399            0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae,
400            0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61,
401            0xf2, 0x00, 0x15, 0xad,
402        ];
403        let mut reader = std::io::Cursor::new(source);
404        let sha256 = ObjectHash::from_stream(&mut reader).unwrap();
405        assert_eq!(
406            sha256.to_string(),
407            "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
408        );
409    }
410
411    /// Parse SHA1 from hex string.
412    #[test]
413    fn test_sha1_from_str() {
414        let _guard = set_hash_kind_for_test(HashKind::Sha1);
415        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
416
417        match ObjectHash::from_str(hash_str) {
418            Ok(hash) => {
419                assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
420            }
421            Err(e) => println!("Error: {e}"),
422        }
423    }
424
425    /// Parse SHA256 from hex string.
426    #[test]
427    fn test_sha256_from_str() {
428        let _guard = set_hash_kind_for_test(HashKind::Sha256);
429        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
430
431        match ObjectHash::from_str(hash_str) {
432            Ok(hash) => {
433                assert_eq!(
434                    hash.to_string(),
435                    "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
436                );
437            }
438            Err(e) => println!("Error: {e}"),
439        }
440    }
441
442    /// SHA1 to_string should round-trip.
443    #[test]
444    fn test_sha1_to_string() {
445        let _guard = set_hash_kind_for_test(HashKind::Sha1);
446        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
447
448        match ObjectHash::from_str(hash_str) {
449            Ok(hash) => {
450                assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
451            }
452            Err(e) => println!("Error: {e}"),
453        }
454    }
455
456    /// SHA256 to_string should round-trip.
457    #[test]
458    fn test_sha256_to_string() {
459        let _guard = set_hash_kind_for_test(HashKind::Sha256);
460        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
461        match ObjectHash::from_str(hash_str) {
462            Ok(hash) => {
463                assert_eq!(
464                    hash.to_string(),
465                    "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
466                );
467            }
468            Err(e) => println!("Error: {e}"),
469        }
470    }
471
472    /// SHA1 to_data should produce expected bytes.
473    #[test]
474    fn test_sha1_to_data() {
475        let _guard = set_hash_kind_for_test(HashKind::Sha1);
476        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
477
478        match ObjectHash::from_str(hash_str) {
479            Ok(hash) => {
480                assert_eq!(
481                    hash.to_data(),
482                    vec![
483                        0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b,
484                        0x0f, 0x24, 0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d
485                    ]
486                );
487            }
488            Err(e) => println!("Error: {e}"),
489        }
490    }
491
492    /// SHA256 to_data should produce expected bytes.
493    #[test]
494    fn test_sha256_to_data() {
495        let _guard = set_hash_kind_for_test(HashKind::Sha256);
496        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
497        match ObjectHash::from_str(hash_str) {
498            Ok(hash) => {
499                assert_eq!(
500                    hash.to_data(),
501                    vec![
502                        0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde,
503                        0x5d, 0xae, 0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c,
504                        0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad,
505                    ]
506                );
507            }
508            Err(e) => println!("Error: {e}"),
509        }
510    }
511}