git_internal/hash.rs
1//! In Git, the SHA-1 hash algorithm is widely used to generate unique identifiers for Git objects.
2//! Each Git object corresponds to a unique SHA-1 hash value, which is used to identify the object's
3//! location in the Git internal and mega database.
4//!
5
6use std::{fmt::Display, io};
7
8use bincode::{Decode, Encode};
9use colored::Colorize;
10use serde::{Deserialize, Serialize};
11use sha1::Digest;
12
13use crate::internal::object::types::ObjectType;
14
15/// The [`SHA1`] struct, encapsulating a `[u8; 20]` array, is specifically designed to represent Git hash IDs.
16/// In Git's context, these IDs are 40-character hexadecimal strings generated via the SHA-1 algorithm.
17/// Each Git object receives a unique hash ID based on its content, serving as an identifier for its location
18/// within the Git internal database. Utilizing a dedicated struct for these hash IDs enhances code readability and
19/// maintainability by providing a clear, structured format for their manipulation and storage.
20///
21/// ### Change Log
22///
23/// In previous versions of the 'mega' project, `Hash` was used to denote hash values. However, in newer versions,
24/// `SHA1` is employed for this purpose. Future updates plan to extend support to SHA256 and SHA512, or potentially
25/// other hash algorithms. By abstracting the hash model to `Hash`, and using specific imports like `use crate::hash::SHA1`
26/// or `use crate::hash::SHA256`, the codebase maintains a high level of clarity and maintainability. This design choice
27/// allows for easier adaptation to different hash algorithms while keeping the underlying implementation consistent and
28/// understandable. - Nov 26, 2023 (by @genedna)
29///
30#[derive(
31 Clone,
32 Copy,
33 Debug,
34 PartialEq,
35 Eq,
36 Hash,
37 PartialOrd,
38 Ord,
39 Default,
40 Deserialize,
41 Serialize,
42 Encode,
43 Decode,
44)]
45pub struct SHA1(pub [u8; 20]);
46
47/// Display trait for SHA1.
48impl Display for SHA1 {
49 /// Allows [`SHA1::to_string()`] to be used.
50 /// Note: If you want a terminal-friendly colorized output, use [`SHA1::to_color_str()`].
51 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
52 write!(f, "{}", hex::encode(self.0))
53 }
54}
55
56impl AsRef<[u8]> for SHA1 {
57 fn as_ref(&self) -> &[u8] {
58 &self.0
59 }
60}
61/// Implementation of the [`std::str::FromStr`] trait for the [`SHA1`] type.
62///
63/// To effectively use the `from_str` method for converting a string to a `SHA1` object, consider the following:
64/// 1. The input string `s` should be a pre-calculated hexadecimal string, exactly 40 characters in length. This string
65/// represents a SHA1 hash and should conform to the standard SHA1 hash format.
66/// 2. It is necessary to explicitly import the `FromStr` trait to utilize the `from_str` method. Include the import
67/// statement `use std::str::FromStr;` in your code before invoking the `from_str` function. This import ensures
68/// that the `from_str` method is available for converting strings to `SHA1` objects.
69impl std::str::FromStr for SHA1 {
70 type Err = String;
71
72 fn from_str(s: &str) -> Result<Self, Self::Err> {
73 let mut h = SHA1::default();
74 if s.len() != 40 {
75 return Err("The length of the string is not 40".to_string());
76 }
77 let bytes = hex::decode(s).map_err(|e| e.to_string())?;
78 h.0.copy_from_slice(bytes.as_slice());
79 Ok(h)
80 }
81}
82
83/// Implementation of the `SHA1` struct.
84///
85/// The naming conventions for the methods in this implementation are designed to be intuitive and self-explanatory:
86///
87/// 1. `new` Prefix:
88/// Methods starting with `new` are used for computing an SHA-1 hash from given data, signifying the creation of
89/// a new `SHA1` instance. For example, `pub fn new(data: &Vec<u8>) -> SHA1` takes a byte vector and calculates its SHA-1 hash.
90///
91/// 2. `from` Prefix:
92/// Methods beginning with `from` are intended for creating a `SHA1` instance from an existing, pre-calculated value.
93/// This implies direct derivation of the `SHA1` object from the provided input. For instance, `pub fn from_bytes(bytes: &[u8]) -> SHA1`
94/// constructs a `SHA1` from a 20-byte array representing an SHA-1 hash.
95///
96/// 3. `to` Prefix:
97/// Methods with the `to` prefix are used for outputting the `SHA1` value in various formats. This prefix indicates a transformation or
98/// conversion of the `SHA1` instance into another representation. For example, `pub fn to_string(self) -> String` converts the SHA1
99/// value to a plain hexadecimal string, and `pub fn to_data(self) -> Vec<u8>` converts it into a byte vector. The `to` prefix
100/// thus serves as a clear indicator that the method is exporting or transforming the SHA1 value into a different format.
101///
102/// These method naming conventions (`new`, `from`, `to`) provide clarity and predictability in the API, making it easier for users
103/// to understand the intended use and functionality of each method within the `SHA1` struct.
104impl SHA1 {
105 // The size of the SHA-1 hash value in bytes
106 pub const SIZE: usize = 20;
107
108 /// Calculate the SHA-1 hash of the byte slice, then create a Hash value
109 pub fn new(data: &[u8]) -> SHA1 {
110 let h = sha1::Sha1::digest(data);
111 SHA1::from_bytes(h.as_ref())
112 }
113 /// Create a Hash from the object type and data
114 /// This function is used to create a SHA1 hash from the object type and data.
115 /// It constructs a byte vector that includes the object type, the size of the data,
116 /// and the data itself, and then computes the SHA1 hash of this byte vector.
117 ///
118 /// Hash compute <- {Object Type}+{ }+{Object Size(before compress)}+{\x00}+{Object Content(before compress)}
119 pub fn from_type_and_data(object_type: ObjectType, data: &[u8]) -> SHA1 {
120 let mut d: Vec<u8> = Vec::new();
121 d.extend(object_type.to_data().unwrap());
122 d.push(b' ');
123 d.extend(data.len().to_string().as_bytes());
124 d.push(b'\x00');
125 d.extend(data);
126 SHA1::new(&d)
127 }
128
129 /// Create Hash from a byte array, which is a 20-byte array already calculated
130 pub fn from_bytes(bytes: &[u8]) -> SHA1 {
131 let mut h = SHA1::default();
132 h.0.copy_from_slice(bytes);
133 h
134 }
135
136 /// Read the Hash value from the stream
137 /// This function will read exactly 20 bytes from the stream
138 pub fn from_stream(data: &mut impl io::Read) -> io::Result<SHA1> {
139 let mut h = SHA1::default();
140 data.read_exact(&mut h.0)?;
141 Ok(h)
142 }
143
144 /// Export sha1 value to String with the color
145 pub fn to_color_str(self) -> String {
146 self.to_string().red().bold().to_string()
147 }
148
149 /// Export sha1 value to a byte array
150 pub fn to_data(self) -> Vec<u8> {
151 self.0.to_vec()
152 }
153
154 /// [`core::fmt::Display`] is somewhat expensive,
155 /// use this hack to get a string more efficiently
156 pub fn _to_string(&self) -> String {
157 hex::encode(self.0)
158 }
159}
160
161#[cfg(test)]
162mod tests {
163
164 use std::io::BufReader;
165 use std::io::Read;
166 use std::io::Seek;
167 use std::io::SeekFrom;
168 use std::str::FromStr;
169 use std::{env, path::PathBuf};
170
171 use crate::hash::SHA1;
172
173 #[test]
174 fn test_sha1_new() {
175 // Example input
176 let data = "Hello, world!".as_bytes();
177
178 // Generate SHA1 hash from the input data
179 let sha1 = SHA1::new(data);
180
181 // Known SHA1 hash for "Hello, world!"
182 let expected_sha1_hash = "943a702d06f34599aee1f8da8ef9f7296031d699";
183
184 assert_eq!(sha1.to_string(), expected_sha1_hash);
185 }
186
187 #[test]
188 fn test_signature_without_delta() {
189 let mut source = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
190 source.push("tests/data/packs/pack-1d0e6c14760c956c173ede71cb28f33d921e232f.pack");
191
192 let f = std::fs::File::open(source).unwrap();
193 let mut buffered = BufReader::new(f);
194
195 buffered.seek(SeekFrom::End(-20)).unwrap();
196 let mut buffer = vec![0; 20];
197 buffered.read_exact(&mut buffer).unwrap();
198 let signature = SHA1::from_bytes(buffer.as_ref());
199 assert_eq!(
200 signature.to_string(),
201 "1d0e6c14760c956c173ede71cb28f33d921e232f"
202 );
203 }
204
205 #[test]
206 fn test_sha1_from_bytes() {
207 let sha1 = SHA1::from_bytes(&[
208 0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
209 0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
210 ]);
211
212 assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
213 }
214
215 #[test]
216 fn test_from_stream() {
217 let source = [
218 0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b, 0x0f, 0x24,
219 0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d,
220 ];
221 let mut reader = std::io::Cursor::new(source);
222 let sha1 = SHA1::from_stream(&mut reader).unwrap();
223 assert_eq!(sha1.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
224 }
225
226 #[test]
227 fn test_sha1_from_str() {
228 let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
229
230 match SHA1::from_str(hash_str) {
231 Ok(hash) => {
232 assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
233 }
234 Err(e) => println!("Error: {e}"),
235 }
236 }
237
238 #[test]
239 fn test_sha1_to_string() {
240 let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
241
242 match SHA1::from_str(hash_str) {
243 Ok(hash) => {
244 assert_eq!(hash.to_string(), "8ab686eafeb1f44702738c8b0f24f2567c36da6d");
245 }
246 Err(e) => println!("Error: {e}"),
247 }
248 }
249
250 #[test]
251 fn test_sha1_to_data() {
252 let hash_str = "8ab686eafeb1f44702738c8b0f24f2567c36da6d";
253
254 match SHA1::from_str(hash_str) {
255 Ok(hash) => {
256 assert_eq!(
257 hash.to_data(),
258 vec![
259 0x8a, 0xb6, 0x86, 0xea, 0xfe, 0xb1, 0xf4, 0x47, 0x02, 0x73, 0x8c, 0x8b,
260 0x0f, 0x24, 0xf2, 0x56, 0x7c, 0x36, 0xda, 0x6d
261 ]
262 );
263 }
264 Err(e) => println!("Error: {e}"),
265 }
266 }
267}