Skip to main content

socket_patch_core/hash/
git_sha256.rs

1use sha2::{Digest, Sha256};
2use std::io;
3use tokio::io::AsyncReadExt;
4
5/// Compute Git-compatible SHA256 hash for a byte slice.
6///
7/// Git hashes objects as: SHA256("blob <size>\0" + content)
8pub fn compute_git_sha256_from_bytes(data: &[u8]) -> String {
9    let mut hasher = Sha256::new();
10    let header = format!("blob {}\0", data.len());
11    hasher.update(header.as_bytes());
12    hasher.update(data);
13    hex::encode(hasher.finalize())
14}
15
16/// Compute Git-compatible SHA256 hash from an async reader with known size.
17///
18/// This streams the content through the hasher without loading it all into memory.
19///
20/// The `size` is written into the Git object header *before* the body is read,
21/// so it must match the number of bytes the reader actually yields. If it does
22/// not (for example, the underlying file was truncated or extended between the
23/// time its size was measured and the time it was read), the resulting hash
24/// would correspond to no real Git object. Rather than silently return a
25/// corrupt hash, this function reports an [`io::Error`] when the byte count
26/// disagrees with `size`.
27pub async fn compute_git_sha256_from_reader<R: tokio::io::AsyncRead + Unpin>(
28    size: u64,
29    mut reader: R,
30) -> io::Result<String> {
31    let mut hasher = Sha256::new();
32    let header = format!("blob {}\0", size);
33    hasher.update(header.as_bytes());
34
35    let mut buf = [0u8; 8192];
36    let mut total: u64 = 0;
37    loop {
38        let n = reader.read(&mut buf).await?;
39        if n == 0 {
40            break;
41        }
42        hasher.update(&buf[..n]);
43        total += n as u64;
44    }
45
46    if total != size {
47        return Err(io::Error::new(
48            io::ErrorKind::InvalidData,
49            format!(
50                "git sha256: declared size {size} does not match {total} bytes read from stream"
51            ),
52        ));
53    }
54
55    Ok(hex::encode(hasher.finalize()))
56}
57
58#[cfg(test)]
59mod tests {
60    use super::*;
61
62    #[test]
63    fn test_empty_content() {
64        let hash = compute_git_sha256_from_bytes(b"");
65        // SHA256("blob 0\0") - Git-compatible hash of empty content
66        assert_eq!(hash.len(), 64);
67        // Verify it's consistent
68        assert_eq!(hash, compute_git_sha256_from_bytes(b""));
69    }
70
71    /// Known-answer vectors computed with the actual Git SHA256 object format
72    /// (`SHA256("blob <size>\0<content>")`). These pin the algorithm to real
73    /// Git output so a regression cannot hide behind the self-consistent
74    /// reader-vs-bytes comparisons elsewhere in this module.
75    #[test]
76    fn test_git_known_answer_vectors() {
77        // `printf 'blob 0\0' | shasum -a 256`
78        assert_eq!(
79            compute_git_sha256_from_bytes(b""),
80            "473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813",
81        );
82        // `printf 'blob 13\0Hello, World!' | shasum -a 256`
83        assert_eq!(
84            compute_git_sha256_from_bytes(b"Hello, World!"),
85            "e118a058f018dda253bb692320c940091b15e4f19067e12fff110606a111f5da",
86        );
87    }
88
89    #[test]
90    fn test_hello_world() {
91        let content = b"Hello, World!";
92        let hash = compute_git_sha256_from_bytes(content);
93        assert_eq!(hash.len(), 64);
94
95        // Manually compute expected: SHA256("blob 13\0Hello, World!")
96        use sha2::{Digest, Sha256};
97        let mut expected_hasher = Sha256::new();
98        expected_hasher.update(b"blob 13\0Hello, World!");
99        let expected = hex::encode(expected_hasher.finalize());
100        assert_eq!(hash, expected);
101    }
102
103    #[test]
104    fn test_known_vector() {
105        // Known test vector: SHA256("blob 0\0")
106        use sha2::{Digest, Sha256};
107        let mut hasher = Sha256::new();
108        hasher.update(b"blob 0\0");
109        let expected = hex::encode(hasher.finalize());
110        assert_eq!(compute_git_sha256_from_bytes(b""), expected);
111    }
112
113    #[tokio::test]
114    async fn test_async_reader_matches_sync() {
115        let content = b"test content for async hashing";
116        let sync_hash = compute_git_sha256_from_bytes(content);
117
118        let cursor = tokio::io::BufReader::new(&content[..]);
119        let async_hash = compute_git_sha256_from_reader(content.len() as u64, cursor)
120            .await
121            .unwrap();
122
123        assert_eq!(sync_hash, async_hash);
124    }
125
126    /// Exercise the streaming loop across many buffer-sized reads (the 8192
127    /// byte buffer is filled multiple times). Guards against off-by-one or
128    /// partial-read mistakes in the chunked update loop.
129    #[tokio::test]
130    async fn test_async_reader_multiple_chunks() {
131        let content: Vec<u8> = (0..50_000u32).map(|i| (i % 251) as u8).collect();
132        let sync_hash = compute_git_sha256_from_bytes(&content);
133
134        let cursor = tokio::io::BufReader::new(&content[..]);
135        let async_hash = compute_git_sha256_from_reader(content.len() as u64, cursor)
136            .await
137            .unwrap();
138
139        assert_eq!(sync_hash, async_hash);
140    }
141
142    /// A declared size larger than the stream (e.g. the file was truncated
143    /// after its size was measured) must be reported as an error, not hashed
144    /// into a silently-corrupt object id.
145    #[tokio::test]
146    async fn test_async_reader_size_too_large_errors() {
147        let content = b"short";
148        let cursor = tokio::io::BufReader::new(&content[..]);
149        let result = compute_git_sha256_from_reader(content.len() as u64 + 100, cursor).await;
150
151        let err = result.expect_err("size larger than stream must error");
152        assert_eq!(err.kind(), io::ErrorKind::InvalidData);
153    }
154
155    /// A declared size smaller than the stream (e.g. the file grew after its
156    /// size was measured) must likewise be reported rather than producing a
157    /// hash whose header disagrees with its body.
158    #[tokio::test]
159    async fn test_async_reader_size_too_small_errors() {
160        let content = b"this stream is longer than declared";
161        let cursor = tokio::io::BufReader::new(&content[..]);
162        let result = compute_git_sha256_from_reader(4, cursor).await;
163
164        let err = result.expect_err("size smaller than stream must error");
165        assert_eq!(err.kind(), io::ErrorKind::InvalidData);
166    }
167}