1use sha2::{Digest, Sha256};
3
4#[derive(Debug, Clone)]
6pub struct Chunk {
7 pub hash: String,
9 pub data: Vec<u8>,
11 pub size: usize,
13}
14
15impl Chunk {
16 #[must_use]
17 pub fn new(data: Vec<u8>) -> Self {
18 let hash = compute_hash(&data);
19 let size = data.len();
20 Self { hash, data, size }
21 }
22
23 #[must_use]
24 pub fn key(&self) -> String {
25 format!("_blob:chunk:{}", self.hash)
26 }
27}
28
29pub struct Chunker {
31 chunk_size: usize,
32}
33
34impl Chunker {
35 #[must_use]
36 pub const fn new(chunk_size: usize) -> Self {
37 Self { chunk_size }
38 }
39
40 #[must_use]
41 pub const fn chunk_size(&self) -> usize {
42 self.chunk_size
43 }
44
45 pub fn chunk<'a>(&'a self, data: &'a [u8]) -> impl Iterator<Item = Chunk> + 'a {
47 data.chunks(self.chunk_size).map(|chunk_data| {
48 let hash = compute_hash(chunk_data);
49 Chunk {
50 hash,
51 data: chunk_data.to_vec(),
52 size: chunk_data.len(),
53 }
54 })
55 }
56
57 #[must_use]
59 pub const fn chunk_count(&self, data_len: usize) -> usize {
60 if data_len == 0 {
61 0
62 } else {
63 data_len.div_ceil(self.chunk_size)
64 }
65 }
66}
67
68#[must_use]
70pub fn compute_hash(data: &[u8]) -> String {
71 let mut hasher = Sha256::new();
72 hasher.update(data);
73 let result = hasher.finalize();
74 format!("sha256:{result:x}")
75}
76
77#[must_use]
79pub fn compute_hash_streaming<'a>(segments: impl Iterator<Item = &'a [u8]>) -> String {
80 let mut hasher = Sha256::new();
81 for segment in segments {
82 hasher.update(segment);
83 }
84 let result = hasher.finalize();
85 format!("sha256:{result:x}")
86}
87
88pub struct StreamingHasher {
90 hasher: Sha256,
91}
92
93impl Default for StreamingHasher {
94 fn default() -> Self {
95 Self::new()
96 }
97}
98
99impl StreamingHasher {
100 #[must_use]
101 pub fn new() -> Self {
102 Self {
103 hasher: Sha256::new(),
104 }
105 }
106
107 pub fn update(&mut self, data: &[u8]) {
108 self.hasher.update(data);
109 }
110
111 #[must_use]
112 pub fn finalize(self) -> String {
113 let result = self.hasher.finalize();
114 format!("sha256:{result:x}")
115 }
116}
117
118#[cfg(test)]
119mod tests {
120 use super::*;
121
122 #[test]
123 fn test_compute_hash() {
124 let data = b"hello world";
125 let hash = compute_hash(data);
126 assert!(hash.starts_with("sha256:"));
128 assert_eq!(hash.len(), 7 + 64); }
130
131 #[test]
132 fn test_compute_hash_deterministic() {
133 let data = b"test data";
134 let hash1 = compute_hash(data);
135 let hash2 = compute_hash(data);
136 assert_eq!(hash1, hash2);
137 }
138
139 #[test]
140 fn test_compute_hash_different_data() {
141 let hash1 = compute_hash(b"data1");
142 let hash2 = compute_hash(b"data2");
143 assert_ne!(hash1, hash2);
144 }
145
146 #[test]
147 fn test_compute_hash_empty() {
148 let hash = compute_hash(b"");
149 assert!(hash.starts_with("sha256:"));
150 }
151
152 #[test]
153 fn test_chunker_single_chunk() {
154 let chunker = Chunker::new(1024);
155 let data = vec![0u8; 100];
156 let chunks: Vec<_> = chunker.chunk(&data).collect();
157
158 assert_eq!(chunks.len(), 1);
159 assert_eq!(chunks[0].size, 100);
160 assert_eq!(chunks[0].data, data);
161 }
162
163 #[test]
164 fn test_chunker_multiple_chunks() {
165 let chunker = Chunker::new(100);
166 let data = vec![0u8; 250];
167 let chunks: Vec<_> = chunker.chunk(&data).collect();
168
169 assert_eq!(chunks.len(), 3);
170 assert_eq!(chunks[0].size, 100);
171 assert_eq!(chunks[1].size, 100);
172 assert_eq!(chunks[2].size, 50);
173 }
174
175 #[test]
176 fn test_chunker_exact_multiple() {
177 let chunker = Chunker::new(100);
178 let data = vec![0u8; 300];
179 let chunks: Vec<_> = chunker.chunk(&data).collect();
180
181 assert_eq!(chunks.len(), 3);
182 assert_eq!(chunks[0].size, 100);
183 assert_eq!(chunks[1].size, 100);
184 assert_eq!(chunks[2].size, 100);
185 }
186
187 #[test]
188 fn test_chunker_empty_data() {
189 let chunker = Chunker::new(100);
190 let data: Vec<u8> = vec![];
191 let chunks: Vec<_> = chunker.chunk(&data).collect();
192
193 assert_eq!(chunks.len(), 0);
194 }
195
196 #[test]
197 fn test_chunk_count() {
198 let chunker = Chunker::new(100);
199 assert_eq!(chunker.chunk_count(0), 0);
200 assert_eq!(chunker.chunk_count(1), 1);
201 assert_eq!(chunker.chunk_count(100), 1);
202 assert_eq!(chunker.chunk_count(101), 2);
203 assert_eq!(chunker.chunk_count(200), 2);
204 assert_eq!(chunker.chunk_count(250), 3);
205 }
206
207 #[test]
208 fn test_chunk_key() {
209 let chunk = Chunk::new(vec![1, 2, 3]);
210 assert!(chunk.key().starts_with("_blob:chunk:sha256:"));
211 }
212
213 #[test]
214 fn test_streaming_hasher() {
215 let mut hasher = StreamingHasher::new();
216 hasher.update(b"hello ");
217 hasher.update(b"world");
218 let hash = hasher.finalize();
219
220 let direct_hash = compute_hash(b"hello world");
221 assert_eq!(hash, direct_hash);
222 }
223
224 #[test]
225 fn test_compute_hash_streaming() {
226 let segments = vec![b"hello ".as_slice(), b"world".as_slice()];
227 let hash = compute_hash_streaming(segments.into_iter());
228
229 let direct_hash = compute_hash(b"hello world");
230 assert_eq!(hash, direct_hash);
231 }
232
233 #[test]
234 fn test_chunk_content_addressing() {
235 let chunker = Chunker::new(100);
236
237 let data1 = vec![42u8; 100];
239 let data2 = vec![42u8; 100];
240
241 let chunks1: Vec<_> = chunker.chunk(&data1).collect();
242 let chunks2: Vec<_> = chunker.chunk(&data2).collect();
243
244 assert_eq!(chunks1[0].hash, chunks2[0].hash);
245 }
246
247 #[test]
248 fn test_chunk_different_content() {
249 let chunker = Chunker::new(100);
250
251 let data1 = vec![1u8; 100];
252 let data2 = vec![2u8; 100];
253
254 let chunks1: Vec<_> = chunker.chunk(&data1).collect();
255 let chunks2: Vec<_> = chunker.chunk(&data2).collect();
256
257 assert_ne!(chunks1[0].hash, chunks2[0].hash);
258 }
259}