Skip to main content

sqry_core/
hash.rs

1//! BLAKE3 hashing utilities for cache module.
2//!
3//! This module provides fast, cryptographic hashing functions for cache key generation
4//! and content verification. BLAKE3 is significantly faster than SHA-256 (~1 GB/s)
5//! while maintaining cryptographic security properties.
6//!
7//! # Usage
8//!
9//! ```no_run
10//! use sqry_core::hash::{hash_file, hash_bytes, Blake3Hash};
11//! use std::path::Path;
12//!
13//! // Hash file contents
14//! let file_hash = hash_file(Path::new("example.rs"))?;
15//!
16//! // Hash byte slice
17//! let content_hash = hash_bytes(b"hello world");
18//!
19//! // Use in cache keys
20//! println!("File hash: {}", file_hash);
21//! # Ok::<(), std::io::Error>(())
22//! ```
23
24use std::fmt;
25use std::fs::File;
26use std::io::{self, Read};
27use std::path::Path;
28
29use crate::config::buffers::parse_buffer_size;
30
31/// BLAKE3 hash output (32 bytes / 256 bits).
32///
33/// This type alias provides semantic clarity when working with hash values
34/// in cache keys and headers.
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
36pub struct Blake3Hash([u8; 32]);
37
38impl Blake3Hash {
39    /// Create a hash from a 32-byte array.
40    ///
41    /// # Examples
42    ///
43    /// ```
44    /// use sqry_core::hash::Blake3Hash;
45    ///
46    /// let bytes = [0u8; 32];
47    /// let hash = Blake3Hash::from_bytes(bytes);
48    /// ```
49    #[inline]
50    #[must_use]
51    pub const fn from_bytes(bytes: [u8; 32]) -> Self {
52        Self(bytes)
53    }
54
55    /// Get the hash as a byte slice.
56    ///
57    /// # Examples
58    ///
59    /// ```
60    /// use sqry_core::hash::Blake3Hash;
61    ///
62    /// let hash = Blake3Hash::from_bytes([0u8; 32]);
63    /// let bytes: &[u8] = hash.as_bytes();
64    /// assert_eq!(bytes.len(), 32);
65    /// ```
66    #[inline]
67    #[must_use]
68    pub const fn as_bytes(&self) -> &[u8; 32] {
69        &self.0
70    }
71
72    /// Convert the hash to a hex string.
73    ///
74    /// # Examples
75    ///
76    /// ```
77    /// use sqry_core::hash::Blake3Hash;
78    ///
79    /// let hash = Blake3Hash::from_bytes([0u8; 32]);
80    /// let hex = hash.to_hex();
81    /// assert_eq!(hex.len(), 64); // 32 bytes * 2 hex digits
82    /// ```
83    #[must_use]
84    pub fn to_hex(&self) -> String {
85        hex::encode(self.0)
86    }
87
88    /// Parse a hash from a hex string.
89    ///
90    /// # Errors
91    ///
92    /// Returns an error if the string is not valid hex or not 64 characters long.
93    ///
94    /// # Examples
95    ///
96    /// ```
97    /// use sqry_core::hash::Blake3Hash;
98    ///
99    /// let hex = "0000000000000000000000000000000000000000000000000000000000000000";
100    /// let hash = Blake3Hash::from_hex(hex)?;
101    /// # Ok::<(), Box<dyn std::error::Error>>(())
102    /// ```
103    pub fn from_hex(hex_str: &str) -> Result<Self, hex::FromHexError> {
104        let mut bytes = [0u8; 32];
105        hex::decode_to_slice(hex_str, &mut bytes)?;
106        Ok(Self(bytes))
107    }
108}
109
110impl fmt::Display for Blake3Hash {
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        write!(f, "{}", self.to_hex())
113    }
114}
115
116impl serde::Serialize for Blake3Hash {
117    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
118    where
119        S: serde::Serializer,
120    {
121        if serializer.is_human_readable() {
122            serializer.serialize_str(&self.to_hex())
123        } else {
124            // For binary formats, serialize as a fixed-size array
125            self.0.serialize(serializer)
126        }
127    }
128}
129
130impl<'de> serde::Deserialize<'de> for Blake3Hash {
131    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
132    where
133        D: serde::Deserializer<'de>,
134    {
135        if deserializer.is_human_readable() {
136            let hex_str = String::deserialize(deserializer)?;
137            Self::from_hex(&hex_str).map_err(serde::de::Error::custom)
138        } else {
139            // For binary formats, deserialize as a fixed-size array
140            let bytes = <[u8; 32]>::deserialize(deserializer)?;
141            Ok(Self(bytes))
142        }
143    }
144}
145
146/// Hash the contents of a file using BLAKE3.
147///
148/// This function reads the file in chunks to avoid loading large files
149/// entirely into memory.
150///
151/// # Errors
152///
153/// Returns an I/O error if the file cannot be read.
154///
155/// # Examples
156///
157/// ```no_run
158/// use sqry_core::hash::hash_file;
159/// use std::path::Path;
160///
161/// let hash = hash_file(Path::new("example.rs"))?;
162/// println!("File hash: {}", hash);
163/// # Ok::<(), std::io::Error>(())
164/// ```
165pub fn hash_file(path: &Path) -> io::Result<Blake3Hash> {
166    let mut file = File::open(path)?;
167    let mut hasher = blake3::Hasher::new();
168
169    // Read in chunks for efficiency (respects SQRY_PARSE_BUFFER env var)
170    let mut buffer = vec![0u8; parse_buffer_size()];
171    loop {
172        let bytes_read = file.read(&mut buffer)?;
173        if bytes_read == 0 {
174            break;
175        }
176        hasher.update(&buffer[..bytes_read]);
177    }
178
179    let hash = hasher.finalize();
180    Ok(Blake3Hash::from_bytes(*hash.as_bytes()))
181}
182
183/// Hash a byte slice using BLAKE3.
184///
185/// This is a convenience function for hashing in-memory data.
186///
187/// # Examples
188///
189/// ```
190/// use sqry_core::hash::hash_bytes;
191///
192/// let hash = hash_bytes(b"hello world");
193/// println!("Content hash: {}", hash);
194/// ```
195#[inline]
196#[must_use]
197pub fn hash_bytes(content: &[u8]) -> Blake3Hash {
198    let hash = blake3::hash(content);
199    Blake3Hash::from_bytes(*hash.as_bytes())
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205    use std::io::Write;
206    use tempfile::NamedTempFile;
207
208    #[test]
209    fn test_hash_bytes_deterministic() {
210        let content = b"hello world";
211        let hash1 = hash_bytes(content);
212        let hash2 = hash_bytes(content);
213
214        assert_eq!(
215            hash1, hash2,
216            "Hashing same content should produce same hash"
217        );
218    }
219
220    #[test]
221    fn test_hash_bytes_different_content() {
222        let hash1 = hash_bytes(b"hello world");
223        let hash2 = hash_bytes(b"hello sqry");
224
225        assert_ne!(
226            hash1, hash2,
227            "Different content should produce different hashes"
228        );
229    }
230
231    #[test]
232    fn test_hash_empty_content() {
233        let hash = hash_bytes(b"");
234
235        // BLAKE3 hash of empty string (known value)
236        let expected_hex = "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262";
237        assert_eq!(hash.to_hex(), expected_hex);
238    }
239
240    #[test]
241    fn test_hash_file() -> io::Result<()> {
242        // Create temporary file with known content
243        let mut temp_file = NamedTempFile::new()?;
244        temp_file.write_all(b"test content for hashing")?;
245        temp_file.flush()?;
246
247        let hash = hash_file(temp_file.path())?;
248
249        // Hash should match hash_bytes of same content
250        let expected = hash_bytes(b"test content for hashing");
251        assert_eq!(hash, expected);
252
253        Ok(())
254    }
255
256    #[test]
257    fn test_hash_file_large() -> io::Result<()> {
258        // Test with file larger than buffer size (65KB)
259        let mut temp_file = NamedTempFile::new()?;
260        let large_content = vec![b'x'; 100_000]; // 100KB
261        temp_file.write_all(&large_content)?;
262        temp_file.flush()?;
263
264        let hash1 = hash_file(temp_file.path())?;
265        let hash2 = hash_bytes(&large_content);
266
267        assert_eq!(hash1, hash2, "Large file hash should match bytes hash");
268
269        Ok(())
270    }
271
272    #[test]
273    fn test_hash_file_nonexistent() {
274        let result = hash_file(Path::new("/nonexistent/file.txt"));
275
276        assert!(
277            result.is_err(),
278            "Hashing nonexistent file should return error"
279        );
280    }
281
282    #[test]
283    fn test_blake3hash_hex_roundtrip() {
284        let original = hash_bytes(b"test");
285        let hex = original.to_hex();
286        let parsed = Blake3Hash::from_hex(&hex).unwrap();
287
288        assert_eq!(original, parsed, "Hex encoding/decoding should roundtrip");
289    }
290
291    #[test]
292    fn test_blake3hash_display() {
293        let hash = hash_bytes(b"test");
294        let display = format!("{hash}");
295        let to_hex = hash.to_hex();
296
297        assert_eq!(display, to_hex, "Display should match to_hex()");
298    }
299
300    #[test]
301    fn test_blake3hash_from_hex_invalid() {
302        // Too short
303        assert!(Blake3Hash::from_hex("abc").is_err());
304
305        // Invalid hex
306        assert!(
307            Blake3Hash::from_hex(
308                "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
309            )
310            .is_err()
311        );
312
313        // Wrong length (62 chars, not 64)
314        assert!(
315            Blake3Hash::from_hex("00000000000000000000000000000000000000000000000000000000000000")
316                .is_err()
317        );
318    }
319
320    #[test]
321    fn test_blake3hash_serde_json() {
322        let hash = hash_bytes(b"test");
323
324        // Serialize to JSON (human-readable)
325        let json = serde_json::to_string(&hash).unwrap();
326        assert!(json.contains(&hash.to_hex()));
327
328        // Deserialize from JSON
329        let parsed: Blake3Hash = serde_json::from_str(&json).unwrap();
330        assert_eq!(hash, parsed);
331    }
332
333    #[test]
334    fn test_blake3hash_serde_postcard() {
335        let hash = hash_bytes(b"test");
336
337        // Serialize to postcard (binary)
338        let binary = postcard::to_allocvec(&hash).unwrap();
339        // Postcard adds overhead for the struct wrapper, so it's more than 32 bytes
340        // The important part is that deserialization works correctly
341        assert!(
342            binary.len() >= 32,
343            "Postcard should serialize at least the 32 hash bytes"
344        );
345
346        // Deserialize from postcard
347        let parsed: Blake3Hash = postcard::from_bytes(&binary).unwrap();
348        assert_eq!(hash, parsed, "Roundtrip serialization should preserve hash");
349    }
350
351    #[test]
352    fn test_known_blake3_vectors() {
353        // Test against known BLAKE3 test vectors
354        // Source: https://github.com/BLAKE3-team/BLAKE3/blob/master/test_vectors/test_vectors.json
355
356        // Empty string
357        let hash = hash_bytes(b"");
358        assert_eq!(
359            hash.to_hex(),
360            "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
361        );
362
363        // Single byte
364        let hash = hash_bytes(&[0]);
365        assert_eq!(
366            hash.to_hex(),
367            "2d3adedff11b61f14c886e35afa036736dcd87a74d27b5c1510225d0f592e213"
368        );
369    }
370}