Skip to main content

pacha/storage/
content_address.rs

1//! Content addressing using BLAKE3 hashing.
2
3use serde::{Deserialize, Serialize};
4use std::fmt;
5use std::io::Read;
6
7/// Compression algorithm used for stored content.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
9#[serde(rename_all = "lowercase")]
10pub enum Compression {
11    /// No compression.
12    #[default]
13    None,
14    /// Zstandard compression.
15    #[cfg(feature = "compression")]
16    Zstd,
17}
18
19impl fmt::Display for Compression {
20    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
21        match self {
22            Self::None => write!(f, "none"),
23            #[cfg(feature = "compression")]
24            Self::Zstd => write!(f, "zstd"),
25        }
26    }
27}
28
29/// Content-addressed identifier for stored artifacts.
30///
31/// Uses BLAKE3 hashing for:
32/// - Deduplication across versions
33/// - Tamper detection
34/// - Efficient delta storage
35#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
36pub struct ContentAddress {
37    /// BLAKE3 hash of content (32 bytes).
38    hash: [u8; 32],
39    /// Content size in bytes (uncompressed).
40    size: u64,
41    /// Compression algorithm used.
42    compression: Compression,
43}
44
45impl ContentAddress {
46    /// Create a new content address from raw components.
47    #[must_use]
48    pub fn new(hash: [u8; 32], size: u64, compression: Compression) -> Self {
49        Self { hash, size, compression }
50    }
51
52    /// Compute content address from bytes.
53    #[must_use]
54    pub fn from_bytes(data: &[u8]) -> Self {
55        let hash = blake3::hash(data);
56        Self { hash: *hash.as_bytes(), size: data.len() as u64, compression: Compression::None }
57    }
58
59    /// Compute content address from a reader.
60    ///
61    /// # Errors
62    ///
63    /// Returns an error if reading fails.
64    pub fn from_reader<R: Read>(mut reader: R) -> std::io::Result<Self> {
65        let mut hasher = blake3::Hasher::new();
66        let mut size = 0u64;
67        let mut buffer = [0u8; 8192];
68
69        loop {
70            let bytes_read = reader.read(&mut buffer)?;
71            if bytes_read == 0 {
72                break;
73            }
74            hasher.update(&buffer[..bytes_read]);
75            size += bytes_read as u64;
76        }
77
78        let hash = hasher.finalize();
79        Ok(Self { hash: *hash.as_bytes(), size, compression: Compression::None })
80    }
81
82    /// Get the hash as bytes.
83    #[must_use]
84    pub fn hash_bytes(&self) -> &[u8; 32] {
85        &self.hash
86    }
87
88    /// Get the hash as a hex string.
89    #[must_use]
90    pub fn hash_hex(&self) -> String {
91        hex::encode(&self.hash)
92    }
93
94    /// Get the content size in bytes.
95    #[must_use]
96    pub fn size(&self) -> u64 {
97        self.size
98    }
99
100    /// Get the compression algorithm.
101    #[must_use]
102    pub fn compression(&self) -> Compression {
103        self.compression
104    }
105
106    /// Set the compression algorithm (returns new instance).
107    #[must_use]
108    pub fn with_compression(mut self, compression: Compression) -> Self {
109        self.compression = compression;
110        self
111    }
112
113    /// Get the storage path prefix (first 2 hex chars for sharding).
114    #[must_use]
115    pub fn storage_prefix(&self) -> String {
116        hex::encode(&self.hash[..1])
117    }
118
119    /// Get the full storage path (`prefix/full_hash`).
120    #[must_use]
121    pub fn storage_path(&self) -> String {
122        format!("{}/{}", self.storage_prefix(), self.hash_hex())
123    }
124
125    /// Verify that data matches this content address.
126    #[must_use]
127    pub fn verify(&self, data: &[u8]) -> bool {
128        let computed = Self::from_bytes(data);
129        self.hash == computed.hash
130    }
131}
132
133impl fmt::Display for ContentAddress {
134    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135        write!(f, "blake3:{}:{}:{}", self.hash_hex(), self.size, self.compression)
136    }
137}
138
139// Need hex encoding
140mod hex {
141    const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
142
143    pub(super) fn encode(bytes: &[u8]) -> String {
144        let mut result = String::with_capacity(bytes.len() * 2);
145        for &byte in bytes {
146            result.push(HEX_CHARS[(byte >> 4) as usize] as char);
147            result.push(HEX_CHARS[(byte & 0x0f) as usize] as char);
148        }
149        result
150    }
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156    use proptest::prelude::*;
157
158    #[test]
159    fn test_content_address_from_bytes() {
160        let data = b"hello world";
161        let addr = ContentAddress::from_bytes(data);
162
163        assert_eq!(addr.size(), 11);
164        assert_eq!(addr.compression(), Compression::None);
165        // BLAKE3 hash of "hello world"
166        assert_eq!(
167            addr.hash_hex(),
168            "d74981efa70a0c880b8d8c1985d075dbcbf679b99a5f9914e5aaf96b831a9e24"
169        );
170    }
171
172    #[test]
173    fn test_content_address_from_reader() {
174        let data = b"hello world";
175        let cursor = std::io::Cursor::new(data);
176        let addr = ContentAddress::from_reader(cursor).unwrap();
177
178        assert_eq!(addr.size(), 11);
179        assert_eq!(
180            addr.hash_hex(),
181            "d74981efa70a0c880b8d8c1985d075dbcbf679b99a5f9914e5aaf96b831a9e24"
182        );
183    }
184
185    #[test]
186    fn test_content_address_verify() {
187        let data = b"hello world";
188        let addr = ContentAddress::from_bytes(data);
189
190        assert!(addr.verify(data));
191        assert!(!addr.verify(b"hello world!"));
192        assert!(!addr.verify(b"Hello world"));
193    }
194
195    #[test]
196    fn test_storage_path() {
197        let data = b"hello world";
198        let addr = ContentAddress::from_bytes(data);
199
200        // First byte is 0xd7, so prefix is "d7"
201        assert_eq!(addr.storage_prefix(), "d7");
202        assert!(addr.storage_path().starts_with("d7/"));
203        assert!(addr.storage_path().ends_with(&addr.hash_hex()));
204    }
205
206    #[test]
207    fn test_display() {
208        let data = b"test";
209        let addr = ContentAddress::from_bytes(data);
210        let display = addr.to_string();
211
212        assert!(display.starts_with("blake3:"));
213        assert!(display.contains(":4:none"));
214    }
215
216    #[test]
217    fn test_with_compression() {
218        let addr = ContentAddress::from_bytes(b"data");
219        assert_eq!(addr.compression(), Compression::None);
220
221        #[cfg(feature = "compression")]
222        {
223            let compressed = addr.with_compression(Compression::Zstd);
224            assert_eq!(compressed.compression(), Compression::Zstd);
225        }
226    }
227
228    #[test]
229    fn test_serialization() {
230        let addr = ContentAddress::from_bytes(b"test data");
231        let json = serde_json::to_string(&addr).unwrap();
232        let deserialized: ContentAddress = serde_json::from_str(&json).unwrap();
233
234        assert_eq!(addr, deserialized);
235    }
236
237    // Property-based tests
238    proptest! {
239        #[test]
240        fn prop_content_address_deterministic(data: Vec<u8>) {
241            let addr1 = ContentAddress::from_bytes(&data);
242            let addr2 = ContentAddress::from_bytes(&data);
243            prop_assert_eq!(addr1, addr2);
244        }
245
246        #[test]
247        fn prop_content_address_size_matches(data: Vec<u8>) {
248            let addr = ContentAddress::from_bytes(&data);
249            prop_assert_eq!(addr.size(), data.len() as u64);
250        }
251
252        #[test]
253        fn prop_content_address_verify_self(data: Vec<u8>) {
254            let addr = ContentAddress::from_bytes(&data);
255            prop_assert!(addr.verify(&data));
256        }
257
258        #[test]
259        fn prop_different_data_different_hash(data1: Vec<u8>, data2: Vec<u8>) {
260            prop_assume!(data1 != data2);
261            let addr1 = ContentAddress::from_bytes(&data1);
262            let addr2 = ContentAddress::from_bytes(&data2);
263            prop_assert_ne!(addr1.hash_bytes(), addr2.hash_bytes());
264        }
265
266        #[test]
267        fn prop_hash_hex_length(data: Vec<u8>) {
268            let addr = ContentAddress::from_bytes(&data);
269            prop_assert_eq!(addr.hash_hex().len(), 64); // 32 bytes = 64 hex chars
270        }
271
272        #[test]
273        fn prop_storage_prefix_length(data: Vec<u8>) {
274            let addr = ContentAddress::from_bytes(&data);
275            prop_assert_eq!(addr.storage_prefix().len(), 2); // 1 byte = 2 hex chars
276        }
277    }
278}
279
280// ─── Kani Formal Verification ────────────────────────────────────────────
281
282#[cfg(kani)]
283mod kani_proofs {
284    use super::*;
285
286    #[kani::proof]
287    fn verify_content_address_size_invariant() {
288        let size: u64 = kani::any();
289        let hash = [0u8; 32];
290        let addr = ContentAddress::new(hash, size, Compression::None);
291        assert!(addr.size() == size);
292    }
293
294    #[kani::proof]
295    fn verify_hash_bytes_length() {
296        let data: [u8; 8] = kani::any();
297        let addr = ContentAddress::from_bytes(&data);
298        assert!(addr.hash_bytes().len() == 32);
299    }
300
301    #[kani::proof]
302    fn verify_hex_encode_length() {
303        let bytes: [u8; 4] = kani::any();
304        let encoded = super::hex::encode(&bytes);
305        assert!(encoded.len() == 8); // 4 bytes = 8 hex chars
306    }
307}