cdc_chunkers/
seq.rs

1use crate::{Chunk, SizeParams};
2use std::cmp::Ordering;
3
4#[derive(Copy, Clone, Eq, PartialEq, Debug)]
5pub enum OperationMode {
6    Increasing,
7    Decreasing,
8}
9
10/// Contains parameters specified in the SeqCDC paper.
11#[derive(Copy, Clone, Eq, PartialEq, Debug)]
12pub struct Config {
13    sequence_length: usize,
14    skip_trigger: usize,
15    skip_size: usize,
16}
17
18pub struct Chunker<'a> {
19    buf: &'a [u8],
20    len: usize,
21    position: usize,
22    sizes: SizeParams,
23    mode: OperationMode,
24    sequence_length: usize,
25    skip_trigger: usize,
26    skip_size: usize,
27}
28
29impl Config {
30    pub fn new(sequence_length: usize, skip_trigger: usize, skip_size: usize) -> Self {
31        Self {
32            sequence_length,
33            skip_trigger,
34            skip_size,
35        }
36    }
37}
38
39impl Default for Config {
40    fn default() -> Self {
41        Self {
42            sequence_length: 5,
43            skip_trigger: 55,
44            skip_size: 256,
45        }
46    }
47}
48
49impl<'a> Chunker<'a> {
50    pub fn default_sizes() -> SizeParams {
51        SizeParams {
52            min: 4 * 1024,
53            avg: 8 * 1024,
54            max: 16 * 1024,
55        }
56    }
57
58    pub fn new(buf: &'a [u8], params: SizeParams, mode: OperationMode, config: Config) -> Self {
59        Self {
60            buf,
61            len: buf.len(),
62            position: 0,
63            sizes: params,
64            mode,
65            sequence_length: config.sequence_length,
66            skip_trigger: config.skip_trigger,
67            skip_size: config.skip_size,
68        }
69    }
70
71    fn find_border_increasing(&mut self) -> Option<usize> {
72        if self.position == self.len {
73            return None;
74        }
75
76        if self.len - self.position < self.sizes.min {
77            let delta = self.len - self.position;
78            self.position = self.len;
79            return Some(delta);
80        }
81
82        self.position += self.sizes.min;
83
84        let mut chunk_len = self.sizes.min;
85        let mut sequence_length = 0;
86        let mut opposing_slope_count = 0;
87
88        while self.position < self.len && chunk_len < self.sizes.max {
89            self.position += 1;
90            chunk_len += 1;
91
92            match self.buf[self.position - 1].cmp(&self.buf[self.position - 2]) {
93                Ordering::Less => {
94                    sequence_length = 0;
95                    opposing_slope_count += 1;
96                }
97                Ordering::Equal => continue,
98                Ordering::Greater => sequence_length += 1,
99            }
100
101            if sequence_length == self.sequence_length {
102                return Some(chunk_len);
103            }
104            if opposing_slope_count == self.skip_trigger {
105                self.position += self.skip_size;
106                chunk_len += self.skip_size;
107                opposing_slope_count = 0;
108            }
109        }
110
111        if self.position > self.len {
112            let delta = self.position - self.len;
113            self.position = self.len;
114            chunk_len -= delta;
115        }
116
117        Some(chunk_len)
118    }
119
120    fn find_border_decreasing(&mut self) -> Option<usize> {
121        if self.position == self.len {
122            return None;
123        }
124
125        if self.len - self.position < self.sizes.min {
126            let delta = self.len - self.position;
127            self.position = self.len;
128            return Some(delta);
129        }
130
131        self.position += self.sizes.min;
132
133        let mut chunk_len = self.sizes.min;
134        let mut sequence_length = 0;
135        let mut opposing_slope_count = 0;
136
137        while self.position < self.len && chunk_len < self.sizes.max {
138            self.position += 1;
139            chunk_len += 1;
140
141            match self.buf[self.position - 1].cmp(&self.buf[self.position - 2]) {
142                Ordering::Less => sequence_length += 1,
143                Ordering::Equal => continue,
144                Ordering::Greater => {
145                    sequence_length = 0;
146                    opposing_slope_count += 1
147                }
148            }
149
150            if sequence_length == self.sequence_length {
151                return Some(chunk_len);
152            }
153            if opposing_slope_count == self.skip_trigger {
154                self.position += self.skip_size;
155                chunk_len += self.skip_size;
156                opposing_slope_count = 0;
157            }
158        }
159
160        if self.position > self.len {
161            let delta = self.position - self.len;
162            self.position = self.len;
163            chunk_len -= delta;
164        }
165
166        Some(chunk_len)
167    }
168
169    /// Returns next size of the chunk.
170    ///
171    /// Reads the info about operation mode from the chunker instance.
172    fn find_border(&mut self) -> Option<usize> {
173        match self.mode {
174            OperationMode::Increasing => self.find_border_increasing(),
175            OperationMode::Decreasing => self.find_border_decreasing(),
176        }
177    }
178}
179
180impl Iterator for Chunker<'_> {
181    type Item = Chunk;
182
183    fn next(&mut self) -> Option<Self::Item> {
184        let start = self.position;
185
186        self.find_border().map(|length| Chunk::new(start, length))
187    }
188}