git_internal/internal/pack/
utils.rs

1use sha1::{Digest, Sha1};
2use std::fs;
3use std::io::{self, Read};
4use std::path::Path;
5
6use crate::hash::SHA1;
7use crate::internal::object::types::ObjectType;
8
9/// Checks if the reader has reached EOF (end of file).
10///
11/// It attempts to read a single byte from the reader into a buffer.
12/// If `Ok(0)` is returned, it means no byte was read, indicating
13/// that the end of the stream has been reached and there is no more
14/// data left to read.
15///
16/// Any other return value means that data was successfully read, so
17/// the reader has not reached the end yet.
18///
19/// # Arguments
20///
21/// * `reader` - The reader to check for EOF state
22///   It must implement the `std::io::Read` trait
23///
24/// # Returns
25///
26/// true if the reader reached EOF, false otherwise
27pub fn is_eof(reader: &mut dyn Read) -> bool {
28    let mut buf = [0; 1];
29    matches!(reader.read(&mut buf), Ok(0))
30}
31
32/// Reads a byte from the given stream and checks if there are more bytes to continue reading.
33///
34/// The return value includes two parts: an unsigned integer formed by the first 7 bits of the byte,
35/// and a boolean value indicating whether more bytes need to be read.
36///
37/// # Parameters
38/// * `stream`: The stream from which the byte is read.
39///
40/// # Returns
41/// Returns an `io::Result` containing a tuple. The first element is the value of the first 7 bits,
42/// and the second element is a boolean indicating whether more bytes need to be read.
43///
44pub fn read_byte_and_check_continuation<R: Read>(stream: &mut R) -> io::Result<(u8, bool)> {
45    // Create a buffer for a single byte
46    let mut bytes = [0; 1];
47
48    // Read exactly one byte from the stream into the buffer
49    stream.read_exact(&mut bytes)?;
50
51    // Extract the byte from the buffer
52    let byte = bytes[0];
53
54    // Extract the first 7 bits of the byte
55    let value = byte & 0b0111_1111;
56
57    // Check if the most significant bit (8th bit) is set, indicating more bytes to follow
58    let msb = byte >= 128;
59
60    // Return the extracted value and the continuation flag
61    Ok((value, msb))
62}
63
64/// Reads bytes from the stream and parses the first byte for type and size.
65/// Subsequent bytes are read as size bytes and are processed as variable-length
66/// integer in little-endian order. The function returns the type and the computed size.
67///
68/// # Parameters
69/// * `stream`: The stream from which the bytes are read.
70/// * `offset`: The offset of the stream.
71///
72/// # Returns
73/// Returns an `io::Result` containing a tuple of the type and the computed size.
74///
75pub fn read_type_and_varint_size<R: Read>(
76    stream: &mut R,
77    offset: &mut usize,
78) -> io::Result<(u8, usize)> {
79    let (first_byte, continuation) = read_byte_and_check_continuation(stream)?;
80
81    // Increment the offset by one byte
82    *offset += 1;
83
84    // Extract the type (bits 2, 3, 4 of the first byte)
85    let type_bits = (first_byte & 0b0111_0000) >> 4;
86
87    // Initialize size with the last 4 bits of the first byte
88    let mut size: u64 = (first_byte & 0b0000_1111) as u64;
89    let mut shift = 4; // Next byte will shift by 4 bits
90
91    let mut more_bytes = continuation;
92    while more_bytes {
93        let (next_byte, continuation) = read_byte_and_check_continuation(stream)?;
94        // Increment the offset by one byte
95        *offset += 1;
96
97        size |= (next_byte as u64) << shift;
98        shift += 7; // Each subsequent byte contributes 7 more bits
99        more_bytes = continuation;
100    }
101
102    Ok((type_bits, size as usize))
103}
104
105/// Reads a variable-length integer (VarInt) encoded in little-endian format from a source implementing the Read trait.
106///
107/// The VarInt encoding uses the most significant bit (MSB) of each byte as a continuation bit.
108/// The continuation bit being 1 indicates that there are following bytes.
109/// The actual integer value is encoded in the remaining 7 bits of each byte.
110///
111/// # Parameters
112/// * `reader`: A source implementing the Read trait (e.g., file, network stream).
113///
114/// # Returns
115/// Returns a `Result` containing either:
116/// * A tuple of the decoded `u64` value and the number of bytes read (`offset`).
117/// * An `io::Error` in case of any reading error or if the VarInt is too long.
118///
119pub fn read_varint_le<R: Read>(reader: &mut R) -> io::Result<(u64, usize)> {
120    // The decoded value
121    let mut value: u64 = 0;
122    // Bit shift for the next byte
123    let mut shift = 0;
124    // Number of bytes read
125    let mut offset = 0;
126
127    loop {
128        // A buffer to read a single byte
129        let mut buf = [0; 1];
130        // Read one byte from the reader
131        reader.read_exact(&mut buf)?;
132
133        // The byte just read
134        let byte = buf[0];
135        if shift > 63 {
136            // VarInt too long for u64
137            return Err(io::Error::new(
138                io::ErrorKind::InvalidData,
139                "VarInt too long",
140            ));
141        }
142
143        // Take the lower 7 bits of the byte
144        let byte_value = (byte & 0x7F) as u64;
145        // Add the byte value to the result, considering the shift
146        value |= byte_value << shift;
147
148        // Increment the byte count
149        offset += 1;
150        // Check if the MSB is 0 (last byte)
151        if byte & 0x80 == 0 {
152            break;
153        }
154
155        // Increment the shift for the next byte
156        shift += 7;
157    }
158
159    Ok((value, offset))
160}
161
162/// The offset for an OffsetDelta object(big-endian order)
163/// # Arguments
164///
165/// * `stream`: Input Data Stream to read
166/// # Returns
167/// * (`delta_offset`(unsigned), `consume`)
168pub fn read_offset_encoding<R: Read>(stream: &mut R) -> io::Result<(u64, usize)> {
169    // Like the object length, the offset for an OffsetDelta object
170    // is stored in a variable number of bytes,
171    // with the most significant bit of each byte indicating whether more bytes follow.
172    // However, the object length encoding allows redundant values,
173    // e.g. the 7-bit value [n] is the same as the 14- or 21-bit values [n, 0] or [n, 0, 0].
174    // Instead, the offset encoding adds 1 to the value of each byte except the least significant one.
175    // And just for kicks, the bytes are ordered from *most* to *least* significant.
176    let mut value = 0;
177    let mut offset = 0;
178    loop {
179        let (byte_value, more_bytes) = read_byte_and_check_continuation(stream)?;
180        offset += 1;
181        value = (value << 7) | byte_value as u64;
182        if !more_bytes {
183            return Ok((value, offset));
184        }
185
186        value += 1; //important!: for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1)) to the result
187    }
188}
189
190/// Read the next N bytes from the reader
191///
192#[inline]
193pub fn read_bytes<R: Read, const N: usize>(stream: &mut R) -> io::Result<[u8; N]> {
194    let mut bytes = [0; N];
195    stream.read_exact(&mut bytes)?;
196
197    Ok(bytes)
198}
199
200/// Reads a partial integer from a stream. (little-endian order)
201///
202/// # Arguments
203///
204/// * `stream` - A mutable reference to a readable stream.
205/// * `bytes` - The number of bytes to read from the stream.
206/// * `present_bytes` - A mutable reference to a byte indicating which bits are present in the integer value.
207///
208/// # Returns
209///
210/// This function returns a result of type `io::Result<usize>`. If the operation is successful, the integer value
211/// read from the stream is returned as `Ok(value)`. Otherwise, an `Err` variant is returned, wrapping an `io::Error`
212/// that describes the specific error that occurred.
213pub fn read_partial_int<R: Read>(
214    stream: &mut R,
215    bytes: u8,
216    present_bytes: &mut u8,
217) -> io::Result<usize> {
218    let mut value: usize = 0;
219
220    // Iterate over the byte indices
221    for byte_index in 0..bytes {
222        // Check if the current bit is present
223        if *present_bytes & 1 != 0 {
224            // Read a byte from the stream
225            let [byte] = read_bytes(stream)?;
226
227            // Add the byte value to the integer value
228            value |= (byte as usize) << (byte_index * 8);
229        }
230
231        // Shift the present bytes to the right
232        *present_bytes >>= 1;
233    }
234
235    Ok(value)
236}
237
238/// Reads the base size and result size of a delta object from the given stream.
239///
240/// **Note**: The stream MUST be positioned at the start of the delta object.
241///
242/// The base size and result size are encoded as variable-length integers in little-endian order.
243///
244/// The base size is the size of the base object, and the result size is the size of the result object.
245///
246/// # Parameters
247/// * `stream`: The stream from which the sizes are read.
248///
249/// # Returns
250/// Returns a tuple containing the base size and result size.
251///
252pub fn read_delta_object_size<R: Read>(stream: &mut R) -> io::Result<(usize, usize)> {
253    let base_size = read_varint_le(stream)?.0 as usize;
254    let result_size = read_varint_le(stream)?.0 as usize;
255    Ok((base_size, result_size))
256}
257
258/// Calculate the SHA1 hash of the given object.
259/// <br> "`<type> <size>\0<content>`"
260/// <br> data: The decompressed content of the object
261pub fn calculate_object_hash(obj_type: ObjectType, data: &Vec<u8>) -> SHA1 {
262    let mut hash = Sha1::new();
263    // Header: "<type> <size>\0"
264    hash.update(obj_type.to_bytes());
265    hash.update(b" ");
266    hash.update(data.len().to_string());
267    hash.update(b"\0");
268
269    // Decompressed data(raw content)
270    hash.update(data);
271
272    let re: [u8; 20] = hash.finalize().into();
273    SHA1(re)
274}
275/// Create an empty directory or clear the existing directory.
276pub fn create_empty_dir<P: AsRef<Path>>(path: P) -> io::Result<()> {
277    let dir = path.as_ref();
278    // 删除整个文件夹
279    if dir.exists() {
280        fs::remove_dir_all(dir)?;
281    }
282    // 重新创建文件夹
283    fs::create_dir_all(dir)?;
284    Ok(())
285}
286
287/// Count the number of files in a directory and its subdirectories.
288pub fn count_dir_files(path: &Path) -> io::Result<usize> {
289    let mut count = 0;
290    for entry in fs::read_dir(path)? {
291        let entry = entry?;
292        let path = entry.path();
293        if path.is_dir() {
294            count += count_dir_files(&path)?;
295        } else {
296            count += 1;
297        }
298    }
299    Ok(count)
300}
301
302/// Count the time taken to execute a block of code.
303#[macro_export]
304macro_rules! time_it {
305    ($msg:expr, $block:block) => {{
306        let start = std::time::Instant::now();
307        let result = $block;
308        let elapsed = start.elapsed();
309        // println!("{}: {:?}", $msg, elapsed);
310        tracing::info!("{}: {:?}", $msg, elapsed);
311        result
312    }};
313}
314
315#[cfg(test)]
316mod tests {
317    use crate::internal::object::types::ObjectType;
318    use std::io;
319    use std::io::Cursor;
320    use std::io::Read;
321
322    use crate::internal::pack::utils::*;
323
324    #[test]
325    fn test_calc_obj_hash() {
326        let hash = calculate_object_hash(ObjectType::Blob, &b"a".to_vec());
327        assert_eq!(hash.to_string(), "2e65efe2a145dda7ee51d1741299f848e5bf752e");
328    }
329
330    #[test]
331    fn eof() {
332        let mut reader = Cursor::new(&b""[..]);
333        assert!(is_eof(&mut reader));
334    }
335
336    #[test]
337    fn not_eof() {
338        let mut reader = Cursor::new(&b"abc"[..]);
339        assert!(!is_eof(&mut reader));
340    }
341
342    #[test]
343    fn eof_midway() {
344        let mut reader = Cursor::new(&b"abc"[..]);
345        reader.read_exact(&mut [0; 2]).unwrap();
346        assert!(!is_eof(&mut reader));
347    }
348
349    #[test]
350    fn reader_error() {
351        struct BrokenReader;
352        impl Read for BrokenReader {
353            fn read(&mut self, _: &mut [u8]) -> io::Result<usize> {
354                Err(io::Error::other("error"))
355            }
356        }
357
358        let mut reader = BrokenReader;
359        assert!(!is_eof(&mut reader));
360    }
361
362    // Test case for a byte without a continuation bit (most significant bit is 0)
363    #[test]
364    fn test_read_byte_and_check_continuation_no_continuation() {
365        let data = [0b0101_0101]; // 85 in binary, highest bit is 0
366        let mut cursor = Cursor::new(data);
367        let (value, more_bytes) = read_byte_and_check_continuation(&mut cursor).unwrap();
368
369        assert_eq!(value, 85); // Expected value is 85
370        assert!(!more_bytes); // No more bytes are expected
371    }
372
373    // Test case for a byte with a continuation bit (most significant bit is 1)
374    #[test]
375    fn test_read_byte_and_check_continuation_with_continuation() {
376        let data = [0b1010_1010]; // 170 in binary, highest bit is 1
377        let mut cursor = Cursor::new(data);
378        let (value, more_bytes) = read_byte_and_check_continuation(&mut cursor).unwrap();
379
380        assert_eq!(value, 42); // Expected value is 42 (170 - 128)
381        assert!(more_bytes); // More bytes are expected
382    }
383
384    // Test cases for edge values, like the minimum and maximum byte values
385    #[test]
386    fn test_read_byte_and_check_continuation_edge_cases() {
387        // Test the minimum value (0)
388        let data = [0b0000_0000];
389        let mut cursor = Cursor::new(data);
390        let (value, more_bytes) = read_byte_and_check_continuation(&mut cursor).unwrap();
391
392        assert_eq!(value, 0); // Expected value is 0
393        assert!(!more_bytes); // No more bytes are expected
394
395        // Test the maximum value (255)
396        let data = [0b1111_1111];
397        let mut cursor = Cursor::new(data);
398        let (value, more_bytes) = read_byte_and_check_continuation(&mut cursor).unwrap();
399
400        assert_eq!(value, 127); // Expected value is 127 (255 - 128)
401        assert!(more_bytes); // More bytes are expected
402    }
403
404    // Test with a single byte where msb is 0 (no continuation)
405    #[test]
406    fn test_single_byte_no_continuation() {
407        let data = [0b0101_0101]; // Type: 5 (101), Size: 5 (0101)
408        let mut offset: usize = 0;
409        let mut cursor = Cursor::new(data);
410        let (type_bits, size) = read_type_and_varint_size(&mut cursor, &mut offset).unwrap();
411
412        assert_eq!(offset, 1); // Offset is 1
413        assert_eq!(type_bits, 5); // Expected type is 2
414        assert_eq!(size, 5); // Expected size is 5
415    }
416
417    // Test with multiple bytes, where continuation occurs
418    #[test]
419    fn test_multiple_bytes_with_continuation() {
420        // Type: 5 (101), Sizes: 5 (0101), 3 (0000011) in little-endian order
421        let data = [0b1101_0101, 0b0000_0011]; // Second byte's msb is 0
422        let mut offset: usize = 0;
423        let mut cursor = Cursor::new(data);
424        let (type_bits, size) = read_type_and_varint_size(&mut cursor, &mut offset).unwrap();
425
426        assert_eq!(offset, 2); // Offset is 2
427        assert_eq!(type_bits, 5); // Expected type is 5
428        // Expected size 000000110101
429        // 110101  = 1 * 2^5 + 1 * 2^4 + 0 * 2^3 + 1 * 2^2 + 0 * 2^1 + 1 * 2^0= 53
430        assert_eq!(size, 53);
431    }
432
433    // Test with edge case where size is spread across multiple bytes
434    #[test]
435    fn test_edge_case_size_spread_across_bytes() {
436        // Type: 1 (001), Sizes: 15 (1111) in little-endian order
437        let data = [0b0001_1111, 0b0000_0010]; // Second byte's msb is 1 (continuation)
438        let mut offset: usize = 0;
439        let mut cursor = Cursor::new(data);
440        let (type_bits, size) = read_type_and_varint_size(&mut cursor, &mut offset).unwrap();
441
442        assert_eq!(offset, 1); // Offset is 1
443        assert_eq!(type_bits, 1); // Expected type is 1
444        // Expected size is 15
445        assert_eq!(size, 15);
446    }
447
448    #[test]
449    fn test_read_varint_le_single_byte() {
450        // Single byte: 0x05 (binary: 0000 0101)
451        // Represents the value 5 with no continuation bit set.
452        let data = vec![0x05];
453        let mut cursor = Cursor::new(data);
454        let (value, offset) = read_varint_le(&mut cursor).unwrap();
455
456        assert_eq!(value, 5);
457        assert_eq!(offset, 1);
458    }
459
460    #[test]
461    fn test_read_varint_le_multiple_bytes() {
462        // Two bytes: 0x85, 0x01 (binary: 1000 0101, 0000 0001)
463        // Represents the value 133. First byte has the continuation bit set.
464        let data = vec![0x85, 0x01];
465        let mut cursor = Cursor::new(data);
466        let (value, offset) = read_varint_le(&mut cursor).unwrap();
467
468        assert_eq!(value, 133);
469        assert_eq!(offset, 2);
470    }
471
472    #[test]
473    fn test_read_varint_le_large_number() {
474        // Five bytes: 0xFF, 0xFF, 0xFF, 0xFF, 0xF (binary: 1111 1111, 1111 1111, 1111 1111, 1111 1111, 0000 1111)
475        // Represents the value 134,217,727. All continuation bits are set except in the last byte.
476        let data = vec![0xFF, 0xFF, 0xFF, 0xFF, 0xF];
477        let mut cursor = Cursor::new(data);
478        let (value, offset) = read_varint_le(&mut cursor).unwrap();
479
480        assert_eq!(value, 0xFFFFFFFF);
481        assert_eq!(offset, 5);
482    }
483
484    #[test]
485    fn test_read_varint_le_zero() {
486        // Single byte: 0x00 (binary: 0000 0000)
487        // Represents the value 0 with no continuation bit set.
488        let data = vec![0x00];
489        let mut cursor = Cursor::new(data);
490        let (value, offset) = read_varint_le(&mut cursor).unwrap();
491
492        assert_eq!(value, 0);
493        assert_eq!(offset, 1);
494    }
495
496    #[test]
497    fn test_read_varint_le_too_long() {
498        let data = vec![
499            0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01,
500        ];
501        let mut cursor = Cursor::new(data);
502        let result = read_varint_le(&mut cursor);
503
504        assert!(result.is_err());
505    }
506
507    #[test]
508    fn test_read_offset_encoding() {
509        let data: Vec<u8> = vec![0b_1101_0101, 0b_0000_0101];
510        let mut cursor = Cursor::new(data);
511        let result = read_offset_encoding(&mut cursor);
512        assert!(result.is_ok());
513        assert_eq!(result.unwrap(), (11013, 2));
514    }
515}