1use crate::{Chunk, SizeParams};
2
3const PRIME: u64 = 153_191u64;
9const MASK: u64 = 0x00ff_ffff_ffffu64;
10const MIN_SIZE: usize = 16 * 1024; const AVG_SIZE: usize = 32 * 1024; const MAX_SIZE: usize = 64 * 1024; const FP_POLY: u64 = 0xbfe6_b8a5_bf37_8d83u64;
16
17const CUT_MASK: u64 = (AVG_SIZE - MIN_SIZE - 1) as u64;
21
22const WIN_SIZE: usize = 16; const WIN_MASK: usize = WIN_SIZE - 1;
25const WIN_SLIDE_OFFSET: usize = 64;
26const WIN_SLIDE_POS: usize = MIN_SIZE - WIN_SLIDE_OFFSET;
27
28pub struct Chunker<'a> {
29 buf: &'a [u8],
30 params: ChunkerParams, pos: usize,
32 len: usize,
33 sizes: SizeParams,
34 win_slide_pos: usize,
35}
36
37#[derive(Clone)]
39pub struct ChunkerParams {
40 poly_pow: u64, out_map: Vec<u64>, ir: Vec<u64>, }
44
45impl<'a> Chunker<'a> {
46 pub fn default_sizes() -> SizeParams {
47 SizeParams {
48 min: MIN_SIZE,
49 avg: AVG_SIZE,
50 max: MAX_SIZE,
51 }
52 }
53
54 pub fn new(buf: &'a [u8]) -> Chunker {
55 Chunker {
56 buf,
57 pos: 0,
58 params: ChunkerParams::new(),
59 len: buf.len(),
60 sizes: Self::default_sizes(),
61 win_slide_pos: WIN_SLIDE_POS,
62 }
63 }
64
65 pub fn with_params(buf: &'a [u8], params: ChunkerParams, sizes: SizeParams) -> Self {
66 debug_assert!(sizes.min > WIN_SLIDE_OFFSET);
67
68 Self {
69 buf,
70 params,
71 pos: 0,
72 len: buf.len(),
73 sizes,
74 win_slide_pos: sizes.min - WIN_SLIDE_OFFSET,
75 }
76 }
77
78 fn find_border(&mut self) -> Option<usize> {
79 if self.len == self.pos {
80 return None;
81 }
82
83 if self.len - self.pos < self.sizes.min {
84 let pos = self.pos;
85 self.pos = self.len;
86 return Some(self.len - pos);
87 }
88
89 self.pos += self.win_slide_pos;
90 let mut chunk_len = self.win_slide_pos;
91
92 let mut win = [0u8; WIN_SIZE];
93 let mut win_idx = 0;
94 let mut roll_hash = 0;
95
96 while self.pos < self.len {
97 let ch = self.buf[self.pos];
98 let out = win[win_idx] as usize;
99 let pushed_out = self.params.out_map[out];
100
101 roll_hash = (roll_hash * PRIME) & MASK;
103 roll_hash += u64::from(ch);
104 roll_hash = roll_hash.wrapping_sub(pushed_out) & MASK;
105
106 win[win_idx] = ch;
108 win_idx = (win_idx + 1) & WIN_MASK;
109
110 chunk_len += 1;
111 self.pos += 1;
112
113 if chunk_len >= self.sizes.min {
114 let checksum = roll_hash ^ self.params.ir[out];
115
116 if (checksum & CUT_MASK) == 0 || chunk_len >= self.sizes.max {
117 return Some(chunk_len);
118 }
119 }
120 }
121
122 Some(chunk_len)
123 }
124
125 pub fn give_params(self) -> ChunkerParams {
126 self.params
127 }
128}
129
130impl<'a> Iterator for Chunker<'a> {
131 type Item = Chunk;
132
133 fn next(&mut self) -> Option<Self::Item> {
134 let start = self.pos;
135
136 self.find_border().map(|length| Chunk::new(start, length))
137 }
138}
139
140impl ChunkerParams {
141 pub fn new() -> Self {
142 let mut cp = ChunkerParams::default();
143
144 for _ in 0..WIN_SIZE {
146 cp.poly_pow = (cp.poly_pow * PRIME) & MASK;
147 }
148
149 for i in 0..256 {
152 cp.out_map[i] = (i as u64 * cp.poly_pow) & MASK;
153
154 let (mut term, mut pow, mut val) = (1u64, 1u64, 1u64);
155 for _ in 0..WIN_SIZE {
156 if (term & FP_POLY) != 0 {
157 val += (pow * i as u64) & MASK;
158 }
159 pow = (pow * PRIME) & MASK;
160 term *= 2;
161 }
162 cp.ir[i] = val;
163 }
164
165 cp
166 }
167}
168
169impl Default for ChunkerParams {
170 fn default() -> Self {
171 let mut ret = ChunkerParams {
172 poly_pow: 1,
173 out_map: vec![0u64; 256],
174 ir: vec![0u64; 256],
175 };
176 ret.out_map.shrink_to_fit();
177 ret.ir.shrink_to_fit();
178 ret
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use crate::rabin::{Chunker, ChunkerParams};
185 use crate::SizeParams;
186
187 #[test]
188 fn rabin_works_with_different_sizes() {
189 let sizes = SizeParams::new(3000, 50000, 100000);
190
191 let data = vec![3; 1024 * 1024 * 300];
192
193 let chunker = Chunker::with_params(&data, ChunkerParams::default(), sizes);
194 for chunk in chunker {
195 println!("{:?}", chunk);
196 }
197 }
198}