1#![allow(clippy::unused_io_amount)]
2pub use cdchunking;
3
4use cdchunking::ChunkerImpl;
5use std::cmp::min;
6use std::io::{Result as IoResult, Seek, Write};
7
8const TABLE: [u64; 256] = [
9 1553318008, 574654857, 759734804, 310648967, 1393527547, 1195718329, 694400241, 1154184075,
10 1319583805, 1298164590, 122602963, 989043992, 1918895050, 933636724, 1369634190, 1963341198,
11 1565176104, 1296753019, 1105746212, 1191982839, 1195494369, 29065008, 1635524067, 722221599,
12 1355059059, 564669751, 1620421856, 1100048288, 1018120624, 1087284781, 1723604070, 1415454125,
13 737834957, 1854265892, 1605418437, 1697446953, 973791659, 674750707, 1669838606, 320299026,
14 1130545851, 1725494449, 939321396, 748475270, 554975894, 1651665064, 1695413559, 671470969,
15 992078781, 1935142196, 1062778243, 1901125066, 1935811166, 1644847216, 744420649, 2068980838,
16 1988851904, 1263854878, 1979320293, 111370182, 817303588, 478553825, 694867320, 685227566,
17 345022554, 2095989693, 1770739427, 165413158, 1322704750, 46251975, 710520147, 700507188,
18 2104251000, 1350123687, 1593227923, 1756802846, 1179873910, 1629210470, 358373501, 807118919,
19 751426983, 172199468, 174707988, 1951167187, 1328704411, 2129871494, 1242495143, 1793093310,
20 1721521010, 306195915, 1609230749, 1992815783, 1790818204, 234528824, 551692332, 1930351755,
21 110996527, 378457918, 638641695, 743517326, 368806918, 1583529078, 1767199029, 182158924,
22 1114175764, 882553770, 552467890, 1366456705, 934589400, 1574008098, 1798094820, 1548210079,
23 821697741, 601807702, 332526858, 1693310695, 136360183, 1189114632, 506273277, 397438002,
24 620771032, 676183860, 1747529440, 909035644, 142389739, 1991534368, 272707803, 1905681287,
25 1210958911, 596176677, 1380009185, 1153270606, 1150188963, 1067903737, 1020928348, 978324723,
26 962376754, 1368724127, 1133797255, 1367747748, 1458212849, 537933020, 1295159285, 2104731913,
27 1647629177, 1691336604, 922114202, 170715530, 1608833393, 62657989, 1140989235, 381784875,
28 928003604, 449509021, 1057208185, 1239816707, 525522922, 476962140, 102897870, 132620570,
29 419788154, 2095057491, 1240747817, 1271689397, 973007445, 1380110056, 1021668229, 12064370,
30 1186917580, 1017163094, 597085928, 2018803520, 1795688603, 1722115921, 2015264326, 506263638,
31 1002517905, 1229603330, 1376031959, 763839898, 1970623926, 1109937345, 524780807, 1976131071,
32 905940439, 1313298413, 772929676, 1578848328, 1108240025, 577439381, 1293318580, 1512203375,
33 371003697, 308046041, 320070446, 1252546340, 568098497, 1341794814, 1922466690, 480833267,
34 1060838440, 969079660, 1836468543, 2049091118, 2023431210, 383830867, 2112679659, 231203270,
35 1551220541, 1377927987, 275637462, 2110145570, 1700335604, 738389040, 1688841319, 1506456297,
36 1243730675, 258043479, 599084776, 41093802, 792486733, 1897397356, 28077829, 1520357900,
37 361516586, 1119263216, 209458355, 45979201, 363681532, 477245280, 2107748241, 601938891,
38 244572459, 1689418013, 1141711990, 1485744349, 1181066840, 1950794776, 410494836, 1445347454,
39 2137242950, 852679640, 1014566730, 1999335993, 1871390758, 1736439305, 231222289, 603972436,
40 783045542, 370384393, 184356284, 709706295, 1453549767, 591603172, 768512391, 854125182,
41];
42
43const BLOCK_MIN_SIZE: usize = 1024 * 2; const BLOCK_AVG_SIZE: usize = 1024 * 8; const BLOCK_MAX_SIZE: usize = 1024 * 64; const MASK_SMALL: u64 = 0x0000d90303530000; const _MASK_AVERAGE: u64 = 0x0000d90303530000; const MASK_LARGE: u64 = 0x0000d90003530000; pub fn cut(buffer: &[u8]) -> usize {
58 let mut fp = 0;
59 let mut i = BLOCK_MIN_SIZE;
60 let mut n = buffer.len();
61 let mut avg_size = BLOCK_AVG_SIZE;
62
63 if n <= BLOCK_MIN_SIZE {
64 return n;
65 }
66
67 if n >= BLOCK_MAX_SIZE {
68 n = BLOCK_MAX_SIZE;
69 } else if n <= BLOCK_AVG_SIZE {
70 avg_size = n;
71 }
72
73 while i < avg_size {
74 fp = (fp << 1) + TABLE[buffer[i] as usize];
75 if fp & MASK_SMALL == 0 {
76 return i;
77 }
78 i += 1;
79 }
80
81 while i < n {
82 fp = (fp << 1) + TABLE[buffer[i] as usize];
83 if fp & MASK_LARGE == 0 {
84 return i;
85 }
86 i += 1;
87 }
88
89 i
90}
91
92const MAX_BUFFER_SIZE: usize = BLOCK_MAX_SIZE * 2;
93
94#[derive(Debug)]
95pub struct Chunker<W: Write + Seek> {
96 dst: W, buf: Vec<u8>, len: usize,
99}
100
101impl<W: Write + Seek> Chunker<W> {
102 pub fn new(dst: W) -> Self {
104 Self {
105 dst,
106 buf: vec![0u8; MAX_BUFFER_SIZE],
107 len: 0,
108 }
109 }
110
111 pub fn into_inner(&self) -> &W {
112 &self.dst
113 }
114
115 pub fn into_owned(self) -> W {
116 self.dst
117 }
118}
119
120fn cut_without_limit(buffer: &[u8]) -> Option<usize> {
121 let mut fp = 0;
122 let mut i = BLOCK_MIN_SIZE;
123 let mut n = buffer.len();
124
125 if n < BLOCK_MIN_SIZE {
126 return None;
127 }
128
129 if n >= BLOCK_MAX_SIZE {
130 n = BLOCK_MAX_SIZE;
131 }
132 while i < n {
137 fp = (fp << 1) + TABLE[buffer[i] as usize];
138 if fp & MASK_LARGE == 0 {
139 return Some(i);
140 }
141 i += 1;
142 }
143
144 Some(i)
145}
146
147impl<W: Write + Seek> Write for Chunker<W> {
148 fn write(&mut self, buffer: &[u8]) -> IoResult<usize> {
149 if buffer.is_empty() {
150 return Ok(0);
151 }
152
153 let buf = &mut self.buf;
154 let len = self.len;
155 let buffer_len = buffer.len();
156 let mut data_written = 0;
157 let mut pos = 0;
158
159 let mut in_len = min(MAX_BUFFER_SIZE - len, buffer_len);
161 assert!(in_len > 0);
162 buf[len..len + in_len].copy_from_slice(&buffer[..in_len]);
163 self.len += in_len;
164
165 while let Some(cut_pos) = cut_without_limit(&buf[pos..self.len]) {
167 data_written += self.dst.write(&buf[pos..pos + cut_pos])?;
168 pos += cut_pos;
169
170 let left_len = buf[pos..].len();
171 if in_len < buffer_len && left_len < BLOCK_MAX_SIZE {
172 buf.copy_within(pos..pos + left_len, 0);
174 self.len = left_len;
175
176 if in_len < buffer_len {
178 let len_to_copy = min(pos, buffer_len - in_len);
179 buf[left_len..left_len + len_to_copy]
180 .copy_from_slice(&buffer[in_len..in_len + len_to_copy]);
181 self.len += len_to_copy;
182 in_len += len_to_copy;
183 }
184
185 pos = 0;
187 }
188 }
189
190 assert!(buffer_len >= data_written);
191 self.len = buffer_len - data_written;
192 if self.len > 0 {
193 buf.copy_within(pos..pos + self.len, 0);
194 }
195
196 Ok(buffer_len)
197 }
198
199 fn flush(&mut self) -> IoResult<()> {
200 if self.len > 0 {
202 self.dst.write(&self.buf[0..self.len])?;
203 }
204
205 self.len = 0;
207
208 self.dst.flush()
210 }
211}
212
213pub struct FastCDC {}
214
215impl ChunkerImpl for FastCDC {
216 fn find_boundary(&mut self, data: &[u8]) -> Option<usize> {
217 Some(cut(data) - 1)
218 }
219
220 fn reset(&mut self) {}
221}
222
223#[cfg(test)]
224mod tests {
225 use super::*;
226 use fake::Fake;
227 use std::cell::RefCell;
228 use std::io::Cursor;
229
230 #[test]
231 fn cut_test() {
232 let buf: [u8; 3] = [0xb, 0xc, 0xff];
233 let i = cut(&buf);
234 assert_eq!(i, buf.len());
235 }
236
237 #[test]
238 fn cut_test2() {
239 let mut data: Vec<u8> = Vec::with_capacity(1075);
241 for _ in 0..1024 {
242 data.push((0..255).fake::<u8>())
243 }
244
245 let buf: Vec<u8> = Vec::with_capacity(1024);
247 let mut cursor = RefCell::new(Cursor::new(buf));
248
249 let mut chunker = Chunker::new(cursor.get_mut());
251 let size_written = chunker.write(&data).unwrap();
252 assert!(chunker.flush().is_ok()); assert_eq!(cursor.get_mut().get_ref(), &data); assert_eq!(data.len(), size_written); }
256}