Skip to main content

git_internal/
hash.rs

1//! Hash utilities for Git objects with selectable algorithms (SHA-1 and SHA-256).
2//! Hash kind is stored thread-locally; set once at startup to match your repository format.
3//! Defaults to SHA-1.
4
5use std::{cell::RefCell, fmt::Display, hash::Hash, io, str::FromStr};
6
7use colored::Colorize;
8use sha1::Digest;
9
10use crate::internal::object::types::ObjectType;
11
12/// Supported hash algorithms for object IDs (selector only, no data attached).
13/// Used to configure which hash algorithm to use globally (thread-local).
14/// Defaults to SHA-1.
15#[derive(
16    Clone,
17    Copy,
18    Debug,
19    PartialEq,
20    Eq,
21    Hash,
22    PartialOrd,
23    Ord,
24    Default,
25    serde::Deserialize,
26    serde::Serialize,
27    rkyv::Archive,
28    rkyv::Serialize,
29    rkyv::Deserialize,
30)]
31pub enum HashKind {
32    #[default]
33    Sha1,
34    Sha256,
35}
36impl HashKind {
37    /// Byte length of the hash output.
38    pub const fn size(&self) -> usize {
39        match self {
40            HashKind::Sha1 => 20,
41            HashKind::Sha256 => 32,
42            // Add more hash kinds here as needed
43        }
44    }
45    /// Hex string length of the hash output.
46    pub const fn hex_len(&self) -> usize {
47        match self {
48            HashKind::Sha1 => 40,
49            HashKind::Sha256 => 64,
50        }
51    }
52    /// Lowercase name of the hash algorithm.
53    pub const fn as_str(&self) -> &'static str {
54        match self {
55            HashKind::Sha1 => "sha1",
56            HashKind::Sha256 => "sha256",
57        }
58    }
59}
60impl std::fmt::Display for HashKind {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        f.write_str(self.as_str())
63    }
64}
65impl std::str::FromStr for HashKind {
66    type Err = String;
67
68    fn from_str(s: &str) -> Result<Self, Self::Err> {
69        match s.to_ascii_lowercase().as_str() {
70            "sha1" => Ok(HashKind::Sha1),
71            "sha256" => Ok(HashKind::Sha256),
72            _ => Err("Invalid hash kind".to_string()),
73        }
74    }
75}
76
77#[derive(
78    Clone,
79    Copy,
80    Debug,
81    PartialEq,
82    Eq,
83    Hash,
84    PartialOrd,
85    Ord,
86    serde::Deserialize,
87    serde::Serialize,
88    rkyv::Archive,
89    rkyv::Serialize,
90    rkyv::Deserialize,
91)]
92/// Concrete object ID value carrying the bytes for the selected algorithm (SHA-1 or SHA-256).
93/// Used for Git object hashes.
94/// Supports conversion to/from hex strings, byte slices, and stream reading.
95pub enum ObjectHash {
96    Sha1([u8; 20]),
97    Sha256([u8; 32]),
98}
99impl Default for ObjectHash {
100    fn default() -> Self {
101        ObjectHash::Sha1([0u8; 20])
102    }
103}
104impl Display for ObjectHash {
105    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
106        write!(f, "{}", hex::encode(self.as_ref()))
107    }
108}
109impl AsRef<[u8]> for ObjectHash {
110    fn as_ref(&self) -> &[u8] {
111        match self {
112            ObjectHash::Sha1(bytes) => bytes.as_slice(),
113            ObjectHash::Sha256(bytes) => bytes.as_slice(),
114        }
115    }
116}
117/// Parse hex (40 for SHA1, 64 for SHA-256) into `ObjectHash`.
118impl FromStr for ObjectHash {
119    type Err = String;
120
121    fn from_str(s: &str) -> Result<Self, Self::Err> {
122        match s.len() {
123            40 => {
124                let mut h = [0u8; 20];
125                let bytes = hex::decode(s).map_err(|e| e.to_string())?;
126                h.copy_from_slice(bytes.as_slice());
127                Ok(ObjectHash::Sha1(h))
128            }
129            64 => {
130                let mut h = [0u8; 32];
131                let bytes = hex::decode(s).map_err(|e| e.to_string())?;
132                h.copy_from_slice(bytes.as_slice());
133                Ok(ObjectHash::Sha256(h))
134            }
135            _ => Err("Invalid hash length".to_string()),
136        }
137    }
138}
139
140impl ObjectHash {
141    /// Zero-filled hex string for a given hash kind.
142    pub fn zero_str(kind: HashKind) -> String {
143        match kind {
144            HashKind::Sha1 => "0000000000000000000000000000000000000000".to_string(),
145            HashKind::Sha256 => {
146                "0000000000000000000000000000000000000000000000000000000000000000".to_string()
147            }
148        }
149    }
150
151    /// Return the hash kind for this value.
152    pub fn kind(&self) -> HashKind {
153        match self {
154            ObjectHash::Sha1(_) => HashKind::Sha1,
155            ObjectHash::Sha256(_) => HashKind::Sha256,
156        }
157    }
158    /// Return the hash size in bytes.
159    pub fn size(&self) -> usize {
160        self.kind().size()
161    }
162
163    /// Compute hash of data using current thread-local `HashKind`.
164    pub fn new(data: &[u8]) -> ObjectHash {
165        match get_hash_kind() {
166            HashKind::Sha1 => {
167                let h = sha1::Sha1::digest(data);
168                let mut bytes = [0u8; 20];
169                bytes.copy_from_slice(h.as_ref());
170                ObjectHash::Sha1(bytes)
171            }
172            HashKind::Sha256 => {
173                let h = sha2::Sha256::digest(data);
174                let mut bytes = [0u8; 32];
175                bytes.copy_from_slice(h.as_ref());
176                ObjectHash::Sha256(bytes)
177            }
178        }
179    }
180    /// Create ObjectHash from object type and data
181    pub fn from_type_and_data(object_type: ObjectType, data: &[u8]) -> ObjectHash {
182        let mut d: Vec<u8> = Vec::new();
183        d.extend(object_type.to_data().unwrap());
184        d.push(b' ');
185        d.extend(data.len().to_string().as_bytes());
186        d.push(b'\x00');
187        d.extend(data);
188        ObjectHash::new(&d)
189    }
190    /// Create `ObjectHash` from raw bytes matching the current hash size.
191    pub fn from_bytes(bytes: &[u8]) -> Result<ObjectHash, String> {
192        let expected_len = get_hash_kind().size();
193        if bytes.len() != expected_len {
194            return Err(format!(
195                "Invalid byte length: got {}, expected {}",
196                bytes.len(),
197                expected_len
198            ));
199        }
200
201        match get_hash_kind() {
202            HashKind::Sha1 => {
203                let mut h = [0u8; 20];
204                h.copy_from_slice(bytes);
205                Ok(ObjectHash::Sha1(h))
206            }
207            HashKind::Sha256 => {
208                let mut h = [0u8; 32];
209                h.copy_from_slice(bytes);
210                Ok(ObjectHash::Sha256(h))
211            }
212        }
213    }
214    /// Read hash bytes from a stream according to current hash size.
215    pub fn from_stream(data: &mut impl io::Read) -> io::Result<ObjectHash> {
216        match get_hash_kind() {
217            HashKind::Sha1 => {
218                let mut h = [0u8; 20];
219                data.read_exact(&mut h)?;
220                Ok(ObjectHash::Sha1(h))
221            }
222            HashKind::Sha256 => {
223                let mut h = [0u8; 32];
224                data.read_exact(&mut h)?;
225                Ok(ObjectHash::Sha256(h))
226            }
227        }
228    }
229
230    /// Format hash as colored string (for terminal display).
231    pub fn to_color_str(self) -> String {
232        self.to_string().red().bold().to_string()
233    }
234
235    /// Return raw bytes of the hash.
236    pub fn to_data(self) -> Vec<u8> {
237        self.as_ref().to_vec()
238    }
239
240    /// Faster string conversion than `Display`.
241    pub fn _to_string(&self) -> String {
242        hex::encode(self.as_ref())
243    }
244
245    /// Get mutable access to inner byte slice.
246    pub fn as_mut_bytes(&mut self) -> &mut [u8] {
247        match self {
248            ObjectHash::Sha1(bytes) => bytes.as_mut_slice(),
249            ObjectHash::Sha256(bytes) => bytes.as_mut_slice(),
250        }
251    }
252}
253
254thread_local! {
255    /// Thread-local variable to store the current hash kind.
256    /// This allows different threads to work with different hash algorithms concurrently
257    /// without interfering with each other.
258    static CURRENT_HASH_KIND: RefCell<HashKind> = RefCell::new(HashKind::default());
259}
260/// Set the thread-local hash kind (configure once at startup to match repo format).
261pub fn set_hash_kind(kind: HashKind) {
262    CURRENT_HASH_KIND.with(|h| {
263        *h.borrow_mut() = kind;
264    });
265}
266
267/// Retrieves the hash kind for the current thread.
268pub fn get_hash_kind() -> HashKind {
269    CURRENT_HASH_KIND.with(|h| *h.borrow())
270}
271/// A guard to reset the hash kind after the test
272pub struct HashKindGuard {
273    prev: HashKind,
274}
275/// Implementation of the `Drop` trait for the `HashKindGuard` struct.
276impl Drop for HashKindGuard {
277    fn drop(&mut self) {
278        set_hash_kind(self.prev);
279    }
280}
281/// Sets the hash kind for the current thread and returns a guard to reset it later.
282pub fn set_hash_kind_for_test(kind: HashKind) -> HashKindGuard {
283    let prev = get_hash_kind();
284    set_hash_kind(kind);
285    HashKindGuard { prev }
286}
287#[cfg(test)]
288mod tests {
289
290    use std::{
291        env,
292        io::{BufReader, Read, Seek, SeekFrom},
293        path::PathBuf,
294        str::FromStr,
295    };
296
297    use crate::hash::{HashKind, ObjectHash, set_hash_kind_for_test};
298
299    /// Hashing "Hello, world!" with SHA1 should match known value.
300    #[test]
301    fn test_sha1_new() {
302        // Set hash kind to SHA1 for this test
303        let _guard = set_hash_kind_for_test(HashKind::Sha1);
304        // Example input
305        let data = "Hello, world!".as_bytes();
306
307        // Generate SHA1 hash from the input data
308        let sha1 = ObjectHash::new(data);
309
310        // Known SHA1 hash for "Hello, world!"
311        let expected_sha1_hash = "943a702d06f34599aee1f8da8ef9f7296031d699";
312
313        assert_eq!(sha1.to_string(), expected_sha1_hash);
314    }
315
316    /// Hashing "Hello, world!" with SHA256 should match known value.
317    #[test]
318    fn test_sha256_new() {
319        let _guard = set_hash_kind_for_test(HashKind::Sha256);
320        let data = "Hello, world!".as_bytes();
321        let sha256 = ObjectHash::new(data);
322        let expected_sha256_hash =
323            "315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3";
324        assert_eq!(sha256.to_string(), expected_sha256_hash);
325    }
326
327    /// Read pack trailer for SHA1 pack should yield SHA1 hash.
328    #[test]
329    fn test_signature_without_delta() {
330        let _guard = set_hash_kind_for_test(HashKind::Sha1);
331        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
332        source.push("tests/data/packs/small-sha1.pack");
333
334        let f = std::fs::File::open(source).unwrap();
335        let mut buffered = BufReader::new(f);
336
337        buffered.seek(SeekFrom::End(-20)).unwrap();
338        let mut buffer = vec![0; 20];
339        buffered.read_exact(&mut buffer).unwrap();
340        let signature = ObjectHash::from_bytes(buffer.as_ref()).unwrap();
341        assert_eq!(signature.kind(), HashKind::Sha1);
342    }
343
344    /// Read pack trailer for SHA256 pack should yield SHA256 hash.
345    #[test]
346    fn test_signature_without_delta_sha256() {
347        let _guard = set_hash_kind_for_test(HashKind::Sha256);
348        let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
349        source.push("tests/data/packs/small-sha256.pack");
350
351        let f = std::fs::File::open(source).unwrap();
352        let mut buffered = BufReader::new(f);
353
354        buffered.seek(SeekFrom::End(-32)).unwrap();
355        let mut buffer = vec![0; 32];
356        buffered.read_exact(&mut buffer).unwrap();
357        let signature = ObjectHash::from_bytes(buffer.as_ref()).unwrap();
358        assert_eq!(signature.kind(), HashKind::Sha256);
359    }
360
361    /// Construct SHA1 from raw bytes.
362    #[test]
363    fn test_sha1_from_bytes() {
364        let _guard = set_hash_kind_for_test(HashKind::Sha1);
365        let sha1 = ObjectHash::from_bytes(&[
366            0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
367            0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
368        ])
369        .unwrap();
370
371        assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
372    }
373
374    /// Construct SHA256 from raw bytes.
375    #[test]
376    fn test_sha256_from_bytes() {
377        let _guard = set_hash_kind_for_test(HashKind::Sha256);
378        // Pre-calculated SHA256 hash for "abc"
379        let sha256 = ObjectHash::from_bytes(&[
380            0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae,
381            0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61,
382            0xf2, 0x00, 0x15, 0xad,
383        ])
384        .unwrap();
385
386        assert_eq!(
387            sha256.to_string(),
388            "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
389        );
390    }
391
392    /// Read hash from stream for SHA1.
393    #[test]
394    fn test_from_stream() {
395        let _guard = set_hash_kind_for_test(HashKind::Sha1);
396        let source = [
397            0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
398            0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
399        ];
400        let mut reader = std::io::Cursor::new(source);
401        let sha1 = ObjectHash::from_stream(&mut reader).unwrap();
402        assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
403    }
404
405    /// Read hash from stream for SHA256.
406    #[test]
407    fn test_sha256_from_stream() {
408        let _guard = set_hash_kind_for_test(HashKind::Sha256);
409        let source = [
410            0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae,
411            0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61,
412            0xf2, 0x00, 0x15, 0xad,
413        ];
414        let mut reader = std::io::Cursor::new(source);
415        let sha256 = ObjectHash::from_stream(&mut reader).unwrap();
416        assert_eq!(
417            sha256.to_string(),
418            "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
419        );
420    }
421
422    /// Parse SHA1 from hex string.
423    #[test]
424    fn test_sha1_from_str() {
425        let _guard = set_hash_kind_for_test(HashKind::Sha1);
426        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
427
428        match ObjectHash::from_str(hash_str) {
429            Ok(hash) => {
430                assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
431            }
432            Err(e) => println!("Error: {e}"),
433        }
434    }
435
436    /// Parse SHA256 from hex string.
437    #[test]
438    fn test_sha256_from_str() {
439        let _guard = set_hash_kind_for_test(HashKind::Sha256);
440        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
441
442        match ObjectHash::from_str(hash_str) {
443            Ok(hash) => {
444                assert_eq!(
445                    hash.to_string(),
446                    "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
447                );
448            }
449            Err(e) => println!("Error: {e}"),
450        }
451    }
452
453    /// SHA1 to_string should round-trip.
454    #[test]
455    fn test_sha1_to_string() {
456        let _guard = set_hash_kind_for_test(HashKind::Sha1);
457        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
458
459        match ObjectHash::from_str(hash_str) {
460            Ok(hash) => {
461                assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
462            }
463            Err(e) => println!("Error: {e}"),
464        }
465    }
466
467    /// SHA256 to_string should round-trip.
468    #[test]
469    fn test_sha256_to_string() {
470        let _guard = set_hash_kind_for_test(HashKind::Sha256);
471        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
472        match ObjectHash::from_str(hash_str) {
473            Ok(hash) => {
474                assert_eq!(
475                    hash.to_string(),
476                    "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
477                );
478            }
479            Err(e) => println!("Error: {e}"),
480        }
481    }
482
483    /// SHA1 to_data should produce expected bytes.
484    #[test]
485    fn test_sha1_to_data() {
486        let _guard = set_hash_kind_for_test(HashKind::Sha1);
487        let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
488
489        match ObjectHash::from_str(hash_str) {
490            Ok(hash) => {
491                assert_eq!(
492                    hash.to_data(),
493                    vec![
494                        0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b,
495                        0x0f, 0x24, 0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d
496                    ]
497                );
498            }
499            Err(e) => println!("Error: {e}"),
500        }
501    }
502
503    /// SHA256 to_data should produce expected bytes.
504    #[test]
505    fn test_sha256_to_data() {
506        let _guard = set_hash_kind_for_test(HashKind::Sha256);
507        let hash_str = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad";
508        match ObjectHash::from_str(hash_str) {
509            Ok(hash) => {
510                assert_eq!(
511                    hash.to_data(),
512                    vec![
513                        0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde,
514                        0x5d, 0xae, 0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c,
515                        0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad,
516                    ]
517                );
518            }
519            Err(e) => println!("Error: {e}"),
520        }
521    }
522}