Skip to main content

hash_roll/
gear.rs

1#![cfg(feature = "gear")]
2
3use crate::{Chunk, ChunkIncr, ToChunkIncr};
4use std::fmt;
5use std::num::Wrapping;
6
7/// Gear Content Defined Chunking using 32bit expansion.
8///
9/// Reference:
10///
11///  Xia, W., Jiang, H., Feng, D., Tian, L., Fu, M., and Zhou, Y. Ddelta: A dedulication-inspired
12///  fast delta compression approach. Performance Evaluation 79 (2014), 258-271.
13///
14///  http://wxia.hustbackup.cn/pub/DElta-PEVA-2014.pdf
15#[derive(Clone)]
16pub struct Gear32<'a> {
17    /// A mask with an appropriate number of bits set for the desired average chunk size.
18    ///
19    /// fixed configuration.
20    mask: u32,
21
22    /// value to match (fp & mask) against.
23    ///
24    /// fixed configuration.
25    xxx: u32,
26
27    /// A table to map bytes to 32bit values
28    ///
29    /// fixed configuration.
30    gear: &'a [u32; 256],
31}
32
33#[derive(Debug, Default, PartialEq, Eq, Clone)]
34pub struct GearState32 {
35    /// current fingerprint/hash
36    ///
37    /// varying state.
38    fp: Wrapping<u32>,
39}
40
41#[derive(Debug, Clone)]
42pub struct GearIncr32<'a> {
43    params: Gear32<'a>,
44
45    state: GearState32,
46}
47
48impl<'a> Chunk for Gear32<'a> {
49    type SearchState = GearState32;
50
51    fn to_search_state(&self) -> Self::SearchState {
52        Default::default()
53    }
54
55    fn find_chunk_edge(&self, state: &mut Self::SearchState, data: &[u8]) -> (Option<usize>, usize) {
56        for i in 0..data.len() {
57            if state.push(self, data[i]) {
58                *state = self.to_search_state();
59                return (Some(i + 1), i + 1);
60            }
61        }
62
63        (None, data.len())
64    }
65}
66
67impl<'a> ToChunkIncr for Gear32<'a> {
68    type Incr = GearIncr32<'a>;
69
70    fn to_chunk_incr(&self) -> Self::Incr {
71        self.into()
72    }
73}
74
75impl<'a> From<&Gear32<'a>> for GearIncr32<'a> {
76    fn from(params: &Gear32<'a>) -> Self {
77        Self {
78            params: params.clone(),
79            state: Default::default(),
80        }
81    } 
82}
83
84impl<'a> fmt::Debug for Gear32<'a> {
85    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
86        fmt.debug_struct("Gear32")
87            .field("mask", &self.mask)
88            .field("xxx", &self.xxx)
89            .field("gear", &&self.gear[..])
90            .finish()
91    }
92}
93
94impl GearState32 {
95    fn push(&mut self, params: &Gear32<'_>, add: u8) -> bool {
96        self.fp = (self.fp << 1) + Wrapping(params.gear[add as usize]);
97        self.fp.0 & params.mask == params.xxx
98    }
99
100    fn reset(&mut self) {
101        self.fp.0 = 0;
102    }
103}
104
105impl<'a> ChunkIncr for GearIncr32<'a> {
106    fn push(&mut self, data: &[u8]) -> Option<usize> {
107        for (i, v) in data.iter().enumerate() {
108            if self.state.push(&self.params, *v) {
109                self.state.reset();
110                return Some(i);
111            }
112        }
113
114        None
115    }
116}
117
118fn msb_mask(log2: usize) -> u32 {
119    // at least 1 bit & not all the bits
120    // FIXME: probably could relax those requirements with better math.
121    //debug_assert!(log2 > 0);
122    //debug_assert!(log2 < 32);
123
124    ((1 << log2) - 1) << (32 - log2)
125}
126
127impl<'a> Gear32<'a> {
128    /// Create a gear chunker which emits blocks with average size `(1<<average_size_log2)`, (or:
129    /// `2**average_size_log2`
130    pub fn with_average_size_log2(average_size_log2: usize) -> Self {
131        Gear32 {
132            mask: msb_mask(average_size_log2),
133            xxx: 0,
134            gear: &super::gear_table::GEAR_32,
135        }
136    }
137}
138
139impl<'a> Default for Gear32<'a> {
140    fn default() -> Self {
141        // 8KB average size
142        Self::with_average_size_log2(13)
143    }
144}
145
146#[cfg(test)]
147mod test {
148    #[test]
149    fn mm() {
150        use super::msb_mask;
151        assert_eq!(0b1 << 31, msb_mask(1));
152        assert_eq!(0b11 << 30, msb_mask(2));
153        assert_eq!(0b111 << 29, msb_mask(3));
154    }
155}