1use super::{RollingHash, CDC};
2use std::default::Default;
3use std::{cmp, mem};
4use {gear, Gear};
5
6fn get_masks(avg_size: usize, nc_level: usize, seed: u64) -> (u64, u64) {
7 let bits = (avg_size.next_power_of_two() - 1).count_ones();
8 if bits == 13 {
9 return (0x0003590703530000, 0x0000d90003530000);
11 }
12 let mut mask = 0u64;
13 let mut v = seed;
14 let a = 6364136223846793005;
15 let c = 1442695040888963407;
16 while mask.count_ones() < bits - nc_level as u32 {
17 v = v.wrapping_mul(a).wrapping_add(c);
18 mask = (mask | 1).rotate_left(v as u32 & 0x3f);
19 }
20 let mask_long = mask;
21 while mask.count_ones() < bits + nc_level as u32 {
22 v = v.wrapping_mul(a).wrapping_add(c);
23 mask = (mask | 1).rotate_left(v as u32 & 0x3f);
24 }
25 let mask_short = mask;
26 (mask_short, mask_long)
27}
28
29pub struct FastCDC {
35 current_chunk_size: u64,
36 gear: Gear,
37 mask_short: u64,
38 mask_long: u64,
39 ignore_size: u64,
40 min_size: u64,
41 avg_size: u64,
42 max_size: u64,
43}
44
45impl Default for FastCDC {
46 fn default() -> Self {
47 FastCDC::new()
48 }
49}
50
51impl FastCDC {
52 pub fn new() -> Self {
54 FastCDC::new_with_chunk_bits(gear::CHUNK_BITS)
55 }
56
57 fn reset(&mut self) {
58 self.gear.reset();
59 self.current_chunk_size = 0;
60 }
61
62 pub fn new_with_chunk_bits(chunk_bits: u32) -> Self {
67 let (mask_short, mask_long) = get_masks(1 << chunk_bits, 2, 0);
68 let gear = Gear::new_with_chunk_bits(chunk_bits);
69 const DIGEST_SIZE: usize = 64;
70 debug_assert_eq!(
71 mem::size_of::<<Gear as RollingHash>::Digest>() * 8,
72 DIGEST_SIZE
73 );
74
75 const SPREAD_BITS: u32 = 3;
76 const WINDOW_SIZE: usize = 64;
77
78 let min_size = (1 << (gear.chunk_bits - SPREAD_BITS + 1)) as u64;
79
80 let ignore_size = min_size - WINDOW_SIZE as u64;
81 let avg_size = (1 << gear.chunk_bits) as u64;
82 let max_size = (1 << (gear.chunk_bits + SPREAD_BITS)) as u64;
83
84 Self {
85 current_chunk_size: 0,
86 gear: gear,
87 mask_short: mask_short,
88 mask_long: mask_long,
89 ignore_size: ignore_size,
90 min_size: min_size,
91 avg_size: avg_size,
92 max_size: max_size,
93 }
94 }
95}
96
97impl CDC for FastCDC {
98 fn find_chunk<'a>(&mut self, whole_buf: &'a [u8]) -> Option<(&'a [u8], &'a [u8])> {
100 let mut left = whole_buf;
101
102 debug_assert!(self.current_chunk_size < self.max_size);
103
104 if self.current_chunk_size < self.ignore_size {
106 let skip_bytes = cmp::min(self.ignore_size - self.current_chunk_size, left.len() as u64);
107 self.current_chunk_size += skip_bytes;
108 left= &left[skip_bytes as usize..];
109 }
110
111 if self.current_chunk_size < self.min_size {
113 let roll_bytes = cmp::min(self.min_size - self.current_chunk_size, left.len() as u64);
114 self.gear.roll(&left[..roll_bytes as usize]);
115 self.current_chunk_size += roll_bytes;
116 left = &left [roll_bytes as usize..];
117 }
118
119 if self.current_chunk_size < self.avg_size {
121 let roll_bytes = cmp::min(self.avg_size - self.current_chunk_size, left.len() as u64);
122 let result = self.gear.find_chunk_mask(left, self.mask_short);
123
124 if let Some((_last, rest)) = result {
125 let result = (&whole_buf[..whole_buf.len() - rest.len()], rest);
126 self.reset();
127 return Some(result);
128 }
129
130 self.current_chunk_size += roll_bytes;
131 left = &left[roll_bytes as usize..];
132 }
133
134 if self.current_chunk_size < self.max_size {
136 let roll_bytes = cmp::min(self.max_size - self.current_chunk_size, left.len() as u64);
137 let result = self.gear.find_chunk_mask(left, self.mask_long);
138
139 if let Some((_last, rest)) = result {
140 let result = (&whole_buf[..whole_buf.len() - rest.len()], rest);
141 self.reset();
142 return Some(result);
143 }
144
145 self.current_chunk_size += roll_bytes;
146 left = &left[roll_bytes as usize..];
147 }
148
149 if self.current_chunk_size >= self.max_size {
150 debug_assert_eq!(self.current_chunk_size, self.max_size);
151 let result = (&whole_buf[..whole_buf.len() - left.len()], left);
152 self.reset();
153 return Some(result);
154 }
155
156 if left.is_empty() {
157 return None;
158 }
159 unreachable!();
160 }
161}
162
163#[cfg(test)]
164mod tests {
165
166 #[cfg(feature = "bench")]
167 mod bench {
168 use test::Bencher;
169 use super::*;
170
171 use tests::test_data_1mb;
172
173 use {CDC, FastCDC};
174
175 #[bench]
176 fn perf_1mb_004k_chunks(b: &mut Bencher) {
177 let v = test_data_1mb();
178 b.bytes = v.len() as u64;
179
180 b.iter(|| {
181 let mut cdc = FastCDC::new_with_chunk_bits(12);
182 let mut buf = v.as_slice();
183
184 while let Some((_last, rest)) = cdc.find_chunk(buf) {
185 buf = rest;
186 }
187 });
188 }
189
190 #[bench]
191 fn perf_1mb_008k_chunks(b: &mut Bencher) {
192 let v = test_data_1mb();
193 b.bytes = v.len() as u64;
194
195 b.iter(|| {
196 let mut cdc = FastCDC::new_with_chunk_bits(13);
197 let mut buf = v.as_slice();
198
199 while let Some((_last, rest)) = cdc.find_chunk(buf) {
200 buf = rest;
201 }
202 });
203 }
204
205 #[bench]
206 fn perf_1mb_064k_chunks(b: &mut Bencher) {
207 let v = test_data_1mb();
208 b.bytes = v.len() as u64;
209
210 b.iter(|| {
211 let mut cdc = FastCDC::new_with_chunk_bits(16);
212 let mut buf = v.as_slice();
213
214 while let Some((_last, rest)) = cdc.find_chunk(buf) {
215 buf = rest;
216 }
217 });
218 }
219
220 #[bench]
221 fn perf_1mb_128k_chunks(b: &mut Bencher) {
222 let v = test_data_1mb();
223 b.bytes = v.len() as u64;
224
225 b.iter(|| {
226 let mut cdc = FastCDC::new_with_chunk_bits(17);
227 let mut buf = v.as_slice();
228
229 while let Some((_last, rest)) = cdc.find_chunk(buf) {
230 buf = rest;
231 }
232 });
233 }
234 }
235}