git_internal/internal/object/
utils.rs

1//! Low-level helpers shared by object encode/decode routines: varint readers, type/size parsing,
2//! and thin zlib compression wrappers tuned for Git object formats.
3
4use std::io::{self, Read, Write};
5
6use flate2::{Compression, write::ZlibEncoder};
7
8const TYPE_BITS: u8 = 3; // Number of bits used to represent the object type
9const VAR_INT_ENCODING_BITS: u8 = 7; // Number of bits used in each byte of variable-length integer encoding
10const TYPE_BYTE_SIZE_BITS: u8 = VAR_INT_ENCODING_BITS - TYPE_BITS;
11const VAR_INT_CONTINUE_FLAG: u8 = 1 << VAR_INT_ENCODING_BITS;
12
13/// Parses a byte slice into a `usize` representing the size of a Git object.
14///
15/// This function is intended to be used for converting the bytes, which represent the size portion
16/// in a Git object, back into a `usize`. This size is typically compared with the actual length of
17/// the object's data part to ensure data integrity.
18///
19/// # Parameters
20/// * `bytes`: A byte slice (`&[u8]`) representing the size in a serialized Git object.
21///
22/// # Returns
23/// Returns a `Result` which is:
24/// * `Ok(usize)`: On successful parsing, returns the size as a `usize`.
25/// * `Err(Box<dyn std::error::Error>)`: On failure, returns an error in a Box. This error could be
26///   due to invalid UTF-8 encoding in the byte slice or a failure to parse the byte slice as a `usize`.
27///
28/// # Errors
29/// This function handles two main types of errors:
30/// 1. `Utf8Error`: If the byte slice is not a valid UTF-8 string, which is necessary for the size representation.
31/// 2. `ParseIntError`: If the byte slice does not represent a valid `usize` value.
32pub fn parse_size_from_bytes(bytes: &[u8]) -> Result<usize, Box<dyn std::error::Error>> {
33    let size_str = std::str::from_utf8(bytes)?;
34    Ok(size_str.parse::<usize>()?)
35}
36
37/// Preserve the last bits of value binary
38///
39fn keep_bits(value: usize, bits: u8) -> usize {
40    value & ((1 << bits) - 1)
41}
42/// Read the first few fields of the object and parse
43///
44pub fn read_type_and_size<R: Read>(stream: &mut R) -> io::Result<(u8, usize)> {
45    // Object type and uncompressed pack data size
46    // are stored in a "size-encoding" variable-length integer.
47    // Bits 4 through 6 store the type and the remaining bits store the size.
48    let value = read_size_encoding(stream)?;
49    let object_type = keep_bits(value >> TYPE_BYTE_SIZE_BITS, TYPE_BITS) as u8;
50    let size = keep_bits(value, TYPE_BYTE_SIZE_BITS)
51        | (value >> VAR_INT_ENCODING_BITS << TYPE_BYTE_SIZE_BITS);
52
53    Ok((object_type, size))
54}
55
56/// Read the type and size of the object
57///
58pub fn read_size_encoding<R: Read>(stream: &mut R) -> io::Result<usize> {
59    let mut value = 0;
60    let mut length = 0;
61
62    loop {
63        let (byte_value, more_bytes) = read_var_int_byte(stream).unwrap();
64        value |= (byte_value as usize) << length;
65        if !more_bytes {
66            return Ok(value);
67        }
68
69        length += VAR_INT_ENCODING_BITS;
70    }
71}
72
73/// Returns whether the first bit of u8 is 1 and returns the 7-bit truth value
74///
75pub fn read_var_int_byte<R: Read>(stream: &mut R) -> io::Result<(u8, bool)> {
76    let [byte] = read_bytes(stream)?;
77    let value = byte & !VAR_INT_CONTINUE_FLAG;
78    let more_bytes = byte & VAR_INT_CONTINUE_FLAG != 0;
79
80    Ok((value, more_bytes))
81}
82
83/// Read the next N bytes from the reader
84///
85#[inline]
86pub fn read_bytes<R: Read, const N: usize>(stream: &mut R) -> io::Result<[u8; N]> {
87    let mut bytes = [0; N];
88    stream.read_exact(&mut bytes)?;
89
90    Ok(bytes)
91}
92
93pub fn compress_zlib(data: &[u8]) -> io::Result<Vec<u8>> {
94    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
95    encoder.write_all(data)?;
96    let compressed_data = encoder.finish()?;
97    Ok(compressed_data)
98}
99
100#[cfg(test)]
101mod tests {
102    use crate::internal::object::utils::parse_size_from_bytes;
103
104    /// Verify that a decimal size string can be parsed back to the original usize value.
105    #[test]
106    fn test_parse_size_from_bytes() -> Result<(), Box<dyn std::error::Error>> {
107        let size: usize = 12345;
108        let size_bytes = size.to_string().as_bytes().to_vec();
109
110        let parsed_size = parse_size_from_bytes(&size_bytes)?;
111
112        assert_eq!(size, parsed_size);
113        Ok(())
114    }
115}