shadow_storage/
chunking.rs1use shadow_core::error::{Result, ShadowError};
4use bytes::Bytes;
5use crypto::hash;
6
7#[derive(Debug, Clone)]
9pub struct ChunkInfo {
10 pub index: usize,
12 pub hash: [u8; 32],
14 pub size: usize,
16}
17
18pub struct Chunker {
20 chunk_size: usize,
22 min_size: usize,
24 max_size: usize,
26}
27
28impl Chunker {
29 pub fn new(chunk_size: usize) -> Self {
31 Self {
32 chunk_size,
33 min_size: chunk_size / 4,
34 max_size: chunk_size * 4,
35 }
36 }
37
38 pub fn chunk(&self, data: &[u8]) -> Result<Vec<(ChunkInfo, Bytes)>> {
40 let mut chunks = Vec::new();
41 let mut offset = 0;
42 let mut index = 0;
43
44 while offset < data.len() {
45 let remaining = data.len() - offset;
46 let chunk_size = remaining.min(self.chunk_size);
47
48 let chunk_data = &data[offset..offset + chunk_size];
49 let chunk_hash = *hash::hash_data(chunk_data).as_bytes();
50
51 let info = ChunkInfo {
52 index,
53 hash: chunk_hash,
54 size: chunk_size,
55 };
56
57 chunks.push((info, Bytes::copy_from_slice(chunk_data)));
58
59 offset += chunk_size;
60 index += 1;
61 }
62
63 Ok(chunks)
64 }
65
66 pub fn reassemble(&self, chunks: &[(ChunkInfo, Bytes)]) -> Result<Bytes> {
68 let mut sorted = chunks.to_vec();
70 sorted.sort_by_key(|(info, _)| info.index);
71
72 let mut result = Vec::new();
74
75 for (info, chunk_data) in sorted {
76 let computed_hash = *hash::hash_data(&chunk_data).as_bytes();
78 if computed_hash != info.hash {
79 return Err(ShadowError::Storage(format!(
80 "Chunk {} hash mismatch", info.index
81 )));
82 }
83
84 result.extend_from_slice(&chunk_data);
85 }
86
87 Ok(Bytes::from(result))
88 }
89
90 pub fn content_hash(chunks: &[ChunkInfo]) -> [u8; 32] {
92 let mut combined = Vec::new();
93 for chunk in chunks {
94 combined.extend_from_slice(&chunk.hash);
95 }
96 *hash::hash_data(&combined).as_bytes()
97 }
98}
99
100impl Default for Chunker {
101 fn default() -> Self {
102 Self::new(256 * 1024) }
104}
105
106#[cfg(test)]
107mod tests {
108 use super::*;
109
110 #[test]
111 fn test_chunking() {
112 let chunker = Chunker::new(100);
113 let data = vec![1u8; 250];
114
115 let chunks = chunker.chunk(&data).unwrap();
116
117 assert_eq!(chunks.len(), 3);
119 assert_eq!(chunks[0].0.size, 100);
120 assert_eq!(chunks[1].0.size, 100);
121 assert_eq!(chunks[2].0.size, 50);
122 }
123
124 #[test]
125 fn test_reassembly() {
126 let chunker = Chunker::new(100);
127 let original = vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10];
128
129 let chunks = chunker.chunk(&original).unwrap();
130 let reassembled = chunker.reassemble(&chunks).unwrap();
131
132 assert_eq!(reassembled.as_ref(), original.as_slice());
133 }
134
135 #[test]
136 fn test_content_hash() {
137 let chunker = Chunker::new(50);
138 let data = vec![1u8; 100];
139
140 let chunks = chunker.chunk(&data).unwrap();
141 let chunk_infos: Vec<ChunkInfo> = chunks.iter().map(|(info, _)| info.clone()).collect();
142
143 let hash1 = Chunker::content_hash(&chunk_infos);
144 let hash2 = Chunker::content_hash(&chunk_infos);
145
146 assert_eq!(hash1, hash2);
147 }
148}