1use crate::{Chunk, ChunkIncr, ToChunkIncr};
2use std::fmt;
3use std::num::Wrapping;
4
5const BLOBBITS: u8 = 13;
6const BLOBSIZE: u32 = 1 << (BLOBBITS as u32);
7
8const WINDOW_BITS: u8 = 6;
9const WINDOW_SIZE: usize = 1 << (WINDOW_BITS as usize);
10
11const ROLLSUM_CHAR_OFFSET: usize = 31;
12
13#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct RollSum {
20 window_len: usize,
21}
22
23impl ToChunkIncr for RollSum {
24 type Incr = RollSumIncr;
25
26 fn to_chunk_incr(&self) -> Self::Incr {
27 self.into()
28 }
29}
30
31impl RollSum {
32 pub fn with_window(window_len: usize) -> Self {
33 Self { window_len }
34 }
35}
36
37impl Chunk for RollSum {
38 type SearchState = RollSumSearchState;
39
40 fn to_search_state(&self) -> Self::SearchState {
41 self.into()
42 }
43
44 fn find_chunk_edge(
45 &self,
46 state: &mut Self::SearchState,
47 data: &[u8],
48 ) -> (Option<usize>, usize) {
49 for i in state.offset..data.len() {
50 let a = data[i];
51 let d = if i >= self.window_len {
52 data[i - self.window_len]
53 } else {
54 0
55 };
56
57 state.state.add(self.window_len, d, a);
58
59 if state.state.at_split() {
60 state.reset(self);
61 return (Some(i + 1), i + 1);
62 }
63 }
64
65 let discard_ct = data.len().saturating_sub(self.window_len);
67 state.offset = data.len() - discard_ct;
68 (None, discard_ct)
69 }
70}
71
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub struct RollSumState {
74 s1: Wrapping<u32>,
79 s2: Wrapping<u32>,
80}
81
82impl From<&RollSum> for RollSumState {
83 fn from(s: &RollSum) -> Self {
84 let ws = Wrapping(s.window_len as u32);
85 Self {
90 s1: ws * Wrapping(ROLLSUM_CHAR_OFFSET as u32),
91 s2: ws * (ws - Wrapping(1)) * Wrapping(ROLLSUM_CHAR_OFFSET as u32),
92 }
93 }
94}
95
96impl RollSumState {
97 fn reset(&mut self, params: &RollSum) {
98 *self = params.into()
99 }
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct RollSumSearchState {
104 state: RollSumState,
105 offset: usize,
106}
107
108impl From<&RollSum> for RollSumSearchState {
109 fn from(s: &RollSum) -> Self {
110 Self {
111 state: s.into(),
112 offset: 0,
113 }
114 }
115}
116
117impl RollSumSearchState {
118 fn reset(&mut self, params: &RollSum) {
119 self.offset = 0;
120 self.state.reset(params);
121 }
122}
123
124impl Default for RollSum {
125 fn default() -> Self {
126 Self::with_window(WINDOW_SIZE)
127 }
128}
129
130#[derive(Clone, PartialEq, Eq)]
136pub struct RollSumIncr {
137 state: RollSumState,
138
139 wofs: Wrapping<usize>,
141 window: Box<[u8]>,
142}
143
144impl fmt::Debug for RollSumIncr {
145 fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> Result<(), ::std::fmt::Error> {
146 f.debug_struct("RollSumIncr")
147 .field("state", &self.state)
148 .field("window", &::fmt_extra::Hs(&self.window[..]))
149 .field("wofs", &self.wofs)
150 .finish()
151 }
152}
153
154impl From<&RollSum> for RollSumIncr {
155 fn from(params: &RollSum) -> Self {
156 Self {
157 state: params.into(),
158 window: vec![0; params.window_len].into_boxed_slice(),
159 wofs: Wrapping(0),
160 }
161 }
162}
163
164impl Default for RollSumIncr {
165 fn default() -> Self {
166 (&RollSum::default()).into()
167 }
168}
169
170impl RollSumState {
171 fn add(&mut self, window_len: usize, drop: u8, add: u8) {
172 let d = Wrapping(drop as u32);
173 self.s1 += Wrapping(add as u32);
174 self.s1 -= d;
175 self.s2 += self.s1;
176 self.s2 -= Wrapping(window_len as u32) * (d + Wrapping(ROLLSUM_CHAR_OFFSET as u32));
177 }
178
179 fn digest(&self) -> u32 {
180 (self.s1.0 << 16) | (self.s2.0 & 0xffff)
181 }
182
183 fn at_split(&self) -> bool {
184 (self.digest() & (BLOBSIZE - 1)) == (BLOBSIZE - 1)
185 }
186}
187
188impl RollSumIncr {
189 pub fn digest(&self) -> u32 {
190 self.state.digest()
191 }
192
193 fn add(&mut self, drop: u8, add: u8) {
194 self.state.add(self.window.len(), drop, add);
195 }
196
197 pub fn roll_byte(&mut self, ch: u8) {
198 let w = self.window[self.wofs.0];
199 self.add(w, ch);
200 self.window[self.wofs.0] = ch;
201 self.wofs = Wrapping((self.wofs + Wrapping(1)).0 & (self.window.len() - 1));
202 }
203
204 #[cfg(test)]
205 pub(crate) fn roll(&mut self, data: &[u8]) {
206 for &i in data.iter() {
207 self.roll_byte(i);
208 }
209 }
210
211 pub fn at_split(&self) -> bool {
220 self.state.at_split()
221 }
222}
223
224impl ChunkIncr for RollSumIncr {
225 fn push(&mut self, data: &[u8]) -> Option<usize> {
226 for (i, &v) in data.iter().enumerate() {
227 self.roll_byte(v);
228 if self.at_split() {
229 return Some(i + 1);
230 }
231 }
232
233 None
234 }
235}
236
237#[cfg(test)]
238mod test {
239 use super::*;
240 use rand::RngCore;
241 use rollsum::Engine;
242
243 #[test]
244 fn rs() {
245 let mut m = RollSumIncr::default();
246 m.roll_byte(3);
247 assert_eq!(m.digest(), 130279491);
248 }
249
250 #[test]
251 fn compare_rollsum() {
252 let mut m1 = RollSumIncr::default();
253 let mut m2 = rollsum::Bup::default();
254
255 assert_eq!(m1.digest(), m2.digest());
256
257 m1.roll_byte(4);
258 m2.roll_byte(4);
259
260 assert_eq!(m1.digest(), m2.digest());
261
262 m1.roll_byte(18);
263 m2.roll_byte(18);
264
265 assert_eq!(m1.digest(), m2.digest());
266
267 let mut r = rand::thread_rng();
268 let mut b = [0u8; 2048];
269
270 r.fill_bytes(&mut b);
271
272 for (i, &v) in b.iter().enumerate() {
273 m1.roll_byte(v);
274 m2.roll_byte(v);
275 println!("i={}, v={}", i, v);
276 assert_eq!(m1.digest(), m2.digest());
277 }
278
279 m1.roll(&b);
280 m2.roll(&b);
281
282 assert_eq!(m1.digest(), m2.digest());
283 }
284
285 #[test]
286 fn compare_bup() {
287 use super::ChunkIncr;
288 let mut m1 = RollSumIncr::default();
289 let mut m2 = rollsum::Bup::default();
290
291 let mut r = rand::thread_rng();
292 let mut b = [0u8; 2048];
293
294 r.fill_bytes(&mut b);
295
296 let mut x = &b[..];
297 loop {
298 let v1 = m1.push(&x);
299 let v2 = m2.find_chunk_edge(&x);
300 assert_eq!(v1, v2);
301
302 match v1 {
303 None => break,
304 Some(v) => {
305 x = &x[v..];
306 }
307 }
308 }
309 }
310}