cdc/
separator.rs

1use super::{Rabin64, RollingHash64};
2
3pub struct Separator {
4    pub index: u64,
5    pub hash: u64,
6}
7
8pub struct SeparatorIter<I, F> {
9    iter: I,
10    predicate: F,
11    rabin: Rabin64,
12    index: u64,
13}
14
15impl<I> SeparatorIter<I, fn(u64) -> bool>
16where
17    I: Iterator<Item = u8>,
18{
19    pub fn new(iter: I) -> SeparatorIter<I, fn(u64) -> bool> {
20        // window_size: 1 << 6 == 64 bytes
21        let separator_size_nb_bits = 6;
22
23        #[inline]
24        fn default_predicate(x: u64) -> bool {
25            const BITMASK: u64 = (1u64 << 13) - 1;
26            x & BITMASK == BITMASK
27        }
28
29        Self::custom_new(iter, separator_size_nb_bits, default_predicate)
30    }
31}
32
33impl<I, F> SeparatorIter<I, F>
34where
35    I: Iterator<Item = u8>,
36    F: Fn(u64) -> bool,
37{
38    pub fn custom_new(
39        mut iter: I,
40        separator_size_nb_bits: u32,
41        predicate: F,
42    ) -> SeparatorIter<I, F> {
43        let mut rabin = Rabin64::new(separator_size_nb_bits);
44        let index = rabin.reset_and_prefill_window(&mut iter) as u64;
45
46        SeparatorIter {
47            iter,
48            predicate,
49            rabin,
50            index,
51        }
52    }
53}
54
55impl<I, F> Iterator for SeparatorIter<I, F>
56where
57    I: Iterator<Item = u8>,
58    F: Fn(u64) -> bool,
59{
60    type Item = Separator;
61
62    #[inline]
63    fn next(&mut self) -> Option<Self::Item> {
64        while let Some(byte) = self.iter.next() {
65            self.rabin.slide(&byte);
66            self.index += 1;
67            if (self.predicate)(self.rabin.hash) {
68                let separator = Separator {
69                    index: self.index,
70                    hash: self.rabin.hash,
71                };
72
73                // Note: We skip subsequent separators which may overlap the current one.
74                self.index += self.rabin.reset_and_prefill_window(&mut self.iter) as u64;
75
76                return Some(separator);
77            }
78        }
79
80        None
81    }
82}
83
84// Converts a separator's hash to a level.
85pub struct HashToLevel {
86    lvl0_nb_bits: u32,
87    lvlup_nb_bits: u32,
88    lvlup_bitmask: u64,
89}
90
91impl HashToLevel {
92    pub fn new() -> HashToLevel {
93        Self::custom_new(13, 3)
94    }
95
96    pub fn custom_new(lvl0_nb_bits: u32, lvlup_nb_bits: u32) -> HashToLevel {
97        HashToLevel {
98            lvl0_nb_bits,
99            lvlup_nb_bits,
100            lvlup_bitmask: (1u64 << lvlup_nb_bits) - 1,
101        }
102    }
103
104    pub fn to_level(&self, hash: u64) -> usize {
105        let mut level = 0usize;
106        let mut h = hash >> self.lvl0_nb_bits;
107        while h & self.lvlup_bitmask == self.lvlup_bitmask {
108            level += 1;
109            h >>= self.lvlup_nb_bits;
110        }
111
112        level
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    #[test]
121    fn hash_to_level() {
122        let converter = HashToLevel::custom_new(4, 2);
123
124        for n in 0..4 {
125            assert_eq!(converter.to_level((9u64 << n) - 1), 0);
126        }
127        for n in 4..6 {
128            assert_eq!(converter.to_level((9u64 << n) - 1), 0);
129        }
130        for n in 6..8 {
131            assert_eq!(converter.to_level((9u64 << n) - 1), 1);
132        }
133        for n in 8..10 {
134            assert_eq!(converter.to_level((9u64 << n) - 1), 2);
135        }
136        for n in 10..12 {
137            assert_eq!(converter.to_level((9u64 << n) - 1), 3);
138        }
139        for n in 12..14 {
140            assert_eq!(converter.to_level((9u64 << n) - 1), 4);
141        }
142    }
143}