1use {RollingHash, CDC};
2use std::default::Default;
3use std::mem;
4
5pub const CHUNK_SIZE: u32 = 1 << CHUNK_BITS;
7
8pub const CHUNK_BITS: u32 = 13;
10
11
12pub struct Gear {
13 digest: u64,
14 pub chunk_bits: u32,
15}
16
17impl Default for Gear {
18 fn default() -> Self {
19 Gear {
20 digest: 0,
21 chunk_bits: CHUNK_BITS,
22 }
23 }
24}
25
26
27include!("_gear_rand.rs");
28
29impl RollingHash for Gear {
30 type Digest = u64;
31
32 fn roll_byte(&mut self, b: u8) {
33 self.digest = (self.digest << 1).wrapping_add(unsafe { *G.get_unchecked(b as usize) });
34 }
35
36 fn roll(&mut self, buf: &[u8]) {
38 let mut digest = self.digest;
39 buf.iter().map(
40 |&b| {
41 digest = (digest << 1).wrapping_add(unsafe { *G.get_unchecked(b as usize) });
42 }
43 ).count();
44 self.digest = digest;
45 }
46
47 fn digest(&self) -> u64 {
48 self.digest
49 }
50
51 fn reset(&mut self) {
52 *self = Gear {
53 chunk_bits: self.chunk_bits,
54 .. Default::default()
55 }
56 }
57}
58
59impl Gear {
60 pub fn new() -> Self {
62 Default::default()
63 }
64
65 pub fn new_with_chunk_bits(chunk_bits: u32) -> Self {
70 assert!(chunk_bits < 32);
71 Gear {
72 chunk_bits: chunk_bits,
73 ..Default::default()
74 }
75 }
76
77 pub fn find_chunk_edge_cond<'a, F>(&mut self, buf: &'a [u8], cond : F) -> Option<(&'a [u8], &'a [u8])>
78 where F : Fn(&Self) -> bool {
79 let mut digest = self.digest;
80
81 for (i, &b) in buf.iter().enumerate() {
82 digest = (digest << 1).wrapping_add(unsafe { *G.get_unchecked(b as usize) });
83
84 self.digest = digest;
85 if cond(self) {
86 self.reset();
87 return Some((&buf[..i+1], &buf[i+1..]));
88 }
89 }
90 self.digest = digest;
91 None
92 }
93
94
95 pub fn find_chunk_mask<'a>(&mut self, buf: &'a [u8], mask : u64) -> Option<(&'a [u8], &'a [u8])> {
96 let mut digest = self.digest;
97
98 for (i, &b) in buf.iter().enumerate() {
99 digest = (digest << 1).wrapping_add(unsafe { *G.get_unchecked(b as usize) });
100
101 if digest & mask == 0 {
102 self.reset();
103 return Some((&buf[..i+1], &buf[i+1..]));
104 }
105 }
106 self.digest = digest;
107 None
108 }
109
110
111}
112
113impl CDC for Gear {
114 fn find_chunk<'a>(&mut self, buf: &'a [u8]) -> Option<(&'a [u8], &'a [u8])> {
118 const DIGEST_SIZE: usize = 64;
119 debug_assert_eq!(
120 mem::size_of::<<Self as RollingHash>::Digest>() * 8,
121 DIGEST_SIZE
122 );
123 let mask = !0u64 << (DIGEST_SIZE as u32 - self.chunk_bits);
124
125 let mut digest = self.digest;
126
127 for (i, &b) in buf.iter().enumerate() {
128 digest = (digest << 1).wrapping_add(unsafe { *G.get_unchecked(b as usize) });
129
130 if digest & mask == 0 {
131 self.reset();
132 return Some((&buf[..i+1], &buf[i+1..]));
133 }
134 }
135 self.digest = digest;
136 None
137 }
138
139}
140
141
142#[cfg(test)]
143mod tests {
144 use super::Gear;
145 use {RollingHash};
146
147 #[test]
148 fn effective_window_size() {
149 let ones = vec![0x1; 1024];
150 let zeroes = vec![0x0; 1024];
151
152 let mut gear = Gear::new();
153 gear.roll(&ones);
154 let digest = gear.digest();
155
156 let mut gear = Gear::new();
157 gear.roll(&zeroes);
158
159 for (i, &b) in ones.iter().enumerate() {
160 gear.roll_byte(b);
161 if gear.digest() == digest {
162 assert_eq!(i, 63);
163 return;
164 }
165 }
166
167 panic!("matching digest not found");
168 }
169
170 #[cfg(feature = "bench")]
171 mod bench {
172 use test::Bencher;
173 use super::*;
174 use CDC;
175
176 use tests::test_data_1mb;
177
178 #[bench]
179 fn perf_1mb_004k_chunks(b: &mut Bencher) {
180 let v = test_data_1mb();
181 b.bytes = v.len() as u64;
182
183 b.iter(|| {
184 let mut cdc = Gear::new_with_chunk_bits(12);
185 let mut buf = v.as_slice();
186
187 while let Some((_last, rest)) = cdc.find_chunk(buf) {
188 buf = rest;
189 }
190 });
191 }
192
193 #[bench]
194 fn perf_1mb_008k_chunks(b: &mut Bencher) {
195 let v = test_data_1mb();
196 b.bytes = v.len() as u64;
197
198 b.iter(|| {
199 let mut cdc = Gear::new_with_chunk_bits(13);
200 let mut buf = v.as_slice();
201
202 while let Some((_last, rest)) = cdc.find_chunk(buf) {
203 buf = rest;
204 }
205 });
206 }
207
208 #[bench]
209 fn perf_1mb_064k_chunks(b: &mut Bencher) {
210 let v = test_data_1mb();
211 b.bytes = v.len() as u64;
212
213 b.iter(|| {
214 let mut cdc = Gear::new_with_chunk_bits(16);
215 let mut buf = v.as_slice();
216
217 while let Some((_last, rest)) = cdc.find_chunk(buf) {
218 buf = rest;
219 }
220 });
221 }
222
223
224 }
225}