rdedup_cdc/
gear.rs

1use {RollingHash, CDC};
2use std::default::Default;
3use std::mem;
4
5/// Default chunk size used by `gear`
6pub const CHUNK_SIZE: u32 = 1 << CHUNK_BITS;
7
8/// Default chunk size used by `gear` (log2)
9pub const CHUNK_BITS: u32 = 13;
10
11
12pub struct Gear {
13    digest: u64,
14    pub chunk_bits: u32,
15}
16
17impl Default for Gear {
18    fn default() -> Self {
19        Gear {
20            digest: 0,
21            chunk_bits: CHUNK_BITS,
22        }
23    }
24}
25
26
27include!("_gear_rand.rs");
28
29impl RollingHash for Gear {
30    type Digest = u64;
31
32    fn roll_byte(&mut self, b: u8) {
33        self.digest = (self.digest << 1).wrapping_add(unsafe { *G.get_unchecked(b as usize) });
34    }
35
36    // due to rustc failing to optimize
37    fn roll(&mut self, buf: &[u8]) {
38        let mut digest = self.digest;
39        buf.iter().map(
40            |&b| {
41                digest = (digest << 1).wrapping_add(unsafe { *G.get_unchecked(b as usize) });
42            }
43            ).count();
44        self.digest = digest;
45    }
46
47    fn digest(&self) -> u64 {
48        self.digest
49    }
50
51    fn reset(&mut self) {
52        *self = Gear {
53            chunk_bits: self.chunk_bits,
54            .. Default::default()
55        }
56    }
57}
58
59impl Gear {
60    /// Create new Gear engine with default chunking settings
61    pub fn new() -> Self {
62        Default::default()
63    }
64
65    /// Create new Gear engine with custom chunking settings
66    ///
67    /// `chunk_bits` is number of bits that need to match in
68    /// the edge condition. `CHUNK_BITS` constant is the default.
69    pub fn new_with_chunk_bits(chunk_bits: u32) -> Self {
70        assert!(chunk_bits < 32);
71        Gear {
72            chunk_bits: chunk_bits,
73            ..Default::default()
74        }
75    }
76
77    pub fn find_chunk_edge_cond<'a, F>(&mut self, buf: &'a [u8], cond : F) -> Option<(&'a [u8], &'a [u8])>
78        where F : Fn(&Self) -> bool {
79            let mut digest = self.digest;
80
81            for (i, &b) in buf.iter().enumerate() {
82                digest = (digest << 1).wrapping_add(unsafe { *G.get_unchecked(b as usize) });
83
84                self.digest = digest;
85                if cond(self) {
86                    self.reset();
87                    return Some((&buf[..i+1], &buf[i+1..]));
88                }
89            }
90            self.digest = digest;
91            None
92        }
93
94
95    pub fn find_chunk_mask<'a>(&mut self, buf: &'a [u8], mask : u64) -> Option<(&'a [u8], &'a [u8])> {
96            let mut digest = self.digest;
97
98            for (i, &b) in buf.iter().enumerate() {
99                digest = (digest << 1).wrapping_add(unsafe { *G.get_unchecked(b as usize) });
100
101                if digest & mask == 0 {
102                    self.reset();
103                    return Some((&buf[..i+1], &buf[i+1..]));
104                }
105            }
106            self.digest = digest;
107            None
108        }
109
110
111}
112
113impl CDC for Gear {
114    /// Find chunk edge using Gear defaults.
115    ///
116    /// See `Engine::find_chunk_edge_cond`.
117    fn find_chunk<'a>(&mut self, buf: &'a [u8]) -> Option<(&'a [u8], &'a [u8])> {
118        const DIGEST_SIZE: usize = 64;
119        debug_assert_eq!(
120            mem::size_of::<<Self as RollingHash>::Digest>() * 8,
121            DIGEST_SIZE
122            );
123        let mask = !0u64 << (DIGEST_SIZE as u32 - self.chunk_bits);
124
125        let mut digest = self.digest;
126
127        for (i, &b) in buf.iter().enumerate() {
128            digest = (digest << 1).wrapping_add(unsafe { *G.get_unchecked(b as usize) });
129
130            if digest & mask == 0 {
131                self.reset();
132                return Some((&buf[..i+1], &buf[i+1..]));
133            }
134        }
135        self.digest = digest;
136        None
137    }
138
139}
140
141
142#[cfg(test)]
143mod tests {
144    use super::Gear;
145    use {RollingHash};
146
147    #[test]
148    fn effective_window_size() {
149        let ones = vec![0x1; 1024];
150        let zeroes = vec![0x0; 1024];
151
152        let mut gear = Gear::new();
153        gear.roll(&ones);
154        let digest = gear.digest();
155
156        let mut gear = Gear::new();
157        gear.roll(&zeroes);
158
159        for (i, &b) in ones.iter().enumerate() {
160            gear.roll_byte(b);
161            if gear.digest() == digest {
162                assert_eq!(i, 63);
163                return;
164            }
165        }
166
167        panic!("matching digest not found");
168    }
169
170    #[cfg(feature = "bench")]
171    mod bench {
172        use test::Bencher;
173        use super::*;
174        use CDC;
175
176        use tests::test_data_1mb;
177
178        #[bench]
179        fn perf_1mb_004k_chunks(b: &mut Bencher) {
180            let v = test_data_1mb();
181            b.bytes = v.len() as u64;
182
183            b.iter(|| {
184                let mut cdc = Gear::new_with_chunk_bits(12);
185                let mut buf = v.as_slice();
186
187                while let Some((_last, rest)) = cdc.find_chunk(buf) {
188                    buf = rest;
189                }
190            });
191        }
192
193        #[bench]
194        fn perf_1mb_008k_chunks(b: &mut Bencher) {
195            let v = test_data_1mb();
196            b.bytes = v.len() as u64;
197
198            b.iter(|| {
199                let mut cdc = Gear::new_with_chunk_bits(13);
200                let mut buf = v.as_slice();
201
202                while let Some((_last, rest)) = cdc.find_chunk(buf) {
203                    buf = rest;
204                }
205            });
206        }
207
208        #[bench]
209        fn perf_1mb_064k_chunks(b: &mut Bencher) {
210            let v = test_data_1mb();
211            b.bytes = v.len() as u64;
212
213            b.iter(|| {
214                let mut cdc = Gear::new_with_chunk_bits(16);
215                let mut buf = v.as_slice();
216
217                while let Some((_last, rest)) = cdc.find_chunk(buf) {
218                    buf = rest;
219                }
220            });
221        }
222
223
224    }
225}