1const CDC_GEAR: [u32; 256] = [
12 1553318008, 574654857, 759734804, 310648967, 1393527547, 1195718329, 694400241, 1154184075,
13 1319583805, 1298164590, 122602963, 989043992, 1918895050, 933636724, 1369634190, 1963341198,
14 1565176104, 1296753019, 1105746212, 1191982839, 1195494369, 29065008, 1635524067, 722221599,
15 1355059059, 564669751, 1620421856, 1100048288, 1018120624, 1087284781, 1723604070, 1415454125,
16 737834957, 1854265892, 1605418437, 1697446953, 973791659, 674750707, 1669838606, 320299026,
17 1130545851, 1725494449, 939321396, 748475270, 554975894, 1651665064, 1695413559, 671470969,
18 992078781, 1935142196, 1062778243, 1901125066, 1935811166, 1644847216, 744420649, 2068980838,
19 1988851904, 1263854878, 1979320293, 111370182, 817303588, 478553825, 694867320, 685227566,
20 345022554, 2095989693, 1770739427, 165413158, 1322704750, 46251975, 710520147, 700507188,
21 2104251000, 1350123687, 1593227923, 1756802846, 1179873910, 1629210470, 358373501, 807118919,
22 751426983, 172199468, 174707988, 1951167187, 1328704411, 2129871494, 1242495143, 1793093310,
23 1721521010, 306195915, 1609230749, 1992815783, 1790818204, 234528824, 551692332, 1930351755,
24 110996527, 378457918, 638641695, 743517326, 368806918, 1583529078, 1767199029, 182158924,
25 1114175764, 882553770, 552467890, 1366456705, 934589400, 1574008098, 1798094820, 1548210079,
26 821697741, 601807702, 332526858, 1693310695, 136360183, 1189114632, 506273277, 397438002,
27 620771032, 676183860, 1747529440, 909035644, 142389739, 1991534368, 272707803, 1905681287,
28 1210958911, 596176677, 1380009185, 1153270606, 1150188963, 1067903737, 1020928348, 978324723,
29 962376754, 1368724127, 1133797255, 1367747748, 1458212849, 537933020, 1295159285, 2104731913,
30 1647629177, 1691336604, 922114202, 170715530, 1608833393, 62657989, 1140989235, 381784875,
31 928003604, 449509021, 1057208185, 1239816707, 525522922, 476962140, 102897870, 132620570,
32 419788154, 2095057491, 1240747817, 1271689397, 973007445, 1380110056, 1021668229, 12064370,
33 1186917580, 1017163094, 597085928, 2018803520, 1795688603, 1722115921, 2015264326, 506263638,
34 1002517905, 1229603330, 1376031959, 763839898, 1970623926, 1109937345, 524780807, 1976131071,
35 905940439, 1313298413, 772929676, 1578848328, 1108240025, 577439381, 1293318580, 1512203375,
36 371003697, 308046041, 320070446, 1252546340, 568098497, 1341794814, 1922466690, 480833267,
37 1060838440, 969079660, 1836468543, 2049091118, 2023431210, 383830867, 2112679659, 231203270,
38 1551220541, 1377927987, 275637462, 2110145570, 1700335604, 738389040, 1688841319, 1506456297,
39 1243730675, 258043479, 599084776, 41093802, 792486733, 1897397356, 28077829, 1520357900,
40 361516586, 1119263216, 209458355, 45979201, 363681532, 477245280, 2107748241, 601938891,
41 244572459, 1689418013, 1141711990, 1485744349, 1181066840, 1950794776, 410494836, 1445347454,
42 2137242950, 852679640, 1014566730, 1999335993, 1871390758, 1736439305, 231222289, 603972436,
43 783045542, 370384393, 184356284, 709706295, 1453549767, 591603172, 768512391, 854125182,
44];
45
46pub(crate) const DATA_AVG_CHUNK_SIZE: u32 = 1024;
48
49pub(crate) fn alg_cdc_params(avg_size: u32) -> (usize, usize, usize, u32, u32) {
58 let min_size = (avg_size / 4) as usize;
59 let max_size = (avg_size * 8) as usize;
60 let offset = min_size + min_size.div_ceil(2);
61 let center_size = avg_size as usize - offset;
62 let bits = (avg_size as f64).log2().round() as u32;
63 let mask_s = (1u32 << (bits + 1)) - 1;
64 let mask_l = (1u32 << (bits - 1)) - 1;
65 (min_size, max_size, center_size, mask_s, mask_l)
66}
67
68pub(crate) fn alg_cdc_offset(
77 buffer: &[u8],
78 mi: usize,
79 ma: usize,
80 cs: usize,
81 mask_s: u32,
82 mask_l: u32,
83) -> usize {
84 let mut pattern: u32 = 0;
85 let size = buffer.len();
86 let mut i = mi.min(size);
87 let mut barrier = cs.min(size);
88
89 while i < barrier {
91 pattern = (pattern >> 1).wrapping_add(CDC_GEAR[buffer[i] as usize]);
92 if pattern & mask_s == 0 {
93 return i + 1;
94 }
95 i += 1;
96 }
97
98 barrier = ma.min(size);
100 while i < barrier {
101 pattern = (pattern >> 1).wrapping_add(CDC_GEAR[buffer[i] as usize]);
102 if pattern & mask_l == 0 {
103 return i + 1;
104 }
105 i += 1;
106 }
107
108 i
109}
110
111pub fn alg_cdc_chunks(data: &[u8], utf32: bool, avg_chunk_size: u32) -> Vec<&[u8]> {
117 if data.is_empty() {
118 return vec![&data[0..0]];
119 }
120
121 let (mi, ma, cs, mask_s, mask_l) = alg_cdc_params(avg_chunk_size);
122 let mut chunks = Vec::new();
123 let mut pos = 0;
124
125 while pos < data.len() {
126 let remaining = &data[pos..];
127 let mut cut_point = alg_cdc_offset(remaining, mi, ma, cs, mask_s, mask_l);
128
129 if utf32 {
131 cut_point -= cut_point % 4;
132 if cut_point == 0 {
133 cut_point = remaining.len().min(4);
134 }
135 }
136
137 chunks.push(&data[pos..pos + cut_point]);
138 pos += cut_point;
139 }
140
141 chunks
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147
148 #[test]
149 fn test_gear_table_length() {
150 assert_eq!(CDC_GEAR.len(), 256);
151 }
152
153 #[test]
154 fn test_gear_table_first_last() {
155 assert_eq!(CDC_GEAR[0], 1553318008);
156 assert_eq!(CDC_GEAR[255], 854125182);
157 }
158
159 #[test]
160 fn test_alg_cdc_params_default() {
161 let (mi, ma, cs, mask_s, mask_l) = alg_cdc_params(1024);
162 assert_eq!(mi, 256, "min_size");
163 assert_eq!(ma, 8192, "max_size");
164 assert_eq!(cs, 640, "center_size");
165 assert_eq!(mask_s, 2047, "mask_s = (1 << 11) - 1");
166 assert_eq!(mask_l, 511, "mask_l = (1 << 9) - 1");
167 }
168
169 #[test]
170 fn test_alg_cdc_offset_small_buffer() {
171 let buf = vec![0u8; 100];
173 let (mi, ma, cs, mask_s, mask_l) = alg_cdc_params(1024);
174 let offset = alg_cdc_offset(&buf, mi, ma, cs, mask_s, mask_l);
175 assert_eq!(offset, 100);
176 }
177
178 #[test]
179 fn test_alg_cdc_offset_returns_at_most_max() {
180 let buf = vec![0xAA; 10000];
182 let (mi, ma, cs, mask_s, mask_l) = alg_cdc_params(1024);
183 let offset = alg_cdc_offset(&buf, mi, ma, cs, mask_s, mask_l);
184 assert!(offset <= ma, "offset {offset} exceeds max_size {ma}");
185 assert!(offset >= mi, "offset {offset} below min_size {mi}");
186 }
187
188 #[test]
189 fn test_alg_cdc_chunks_empty() {
190 let chunks = alg_cdc_chunks(b"", false, 1024);
191 assert_eq!(chunks.len(), 1);
192 assert_eq!(chunks[0].len(), 0);
193 }
194
195 #[test]
196 fn test_alg_cdc_chunks_small_data() {
197 let data = vec![42u8; 100];
199 let chunks = alg_cdc_chunks(&data, false, 1024);
200 assert_eq!(chunks.len(), 1);
201 assert_eq!(chunks[0].len(), 100);
202 }
203
204 #[test]
205 fn test_alg_cdc_chunks_reassembly() {
206 let data: Vec<u8> = (0..=255).cycle().take(4096).collect();
208 let chunks = alg_cdc_chunks(&data, false, 1024);
209 let reassembled: Vec<u8> = chunks.iter().flat_map(|c| c.iter().copied()).collect();
210 assert_eq!(reassembled, data);
211 }
212
213 #[test]
214 fn test_alg_cdc_chunks_deterministic() {
215 let data: Vec<u8> = (0..=255).cycle().take(4096).collect();
216 let chunks1 = alg_cdc_chunks(&data, false, 1024);
217 let chunks2 = alg_cdc_chunks(&data, false, 1024);
218 assert_eq!(chunks1.len(), chunks2.len());
219 for (a, b) in chunks1.iter().zip(chunks2.iter()) {
220 assert_eq!(a, b);
221 }
222 }
223
224 #[test]
225 fn test_alg_cdc_chunks_multiple_chunks() {
226 let data: Vec<u8> = (0..=255).cycle().take(8192).collect();
228 let chunks = alg_cdc_chunks(&data, false, 1024);
229 assert!(
230 chunks.len() > 1,
231 "expected multiple chunks, got {}",
232 chunks.len()
233 );
234 }
235
236 #[test]
237 fn test_alg_cdc_chunks_utf32_small_buffer() {
238 let data = [0xAA, 0xBB, 0xCC];
242 let chunks = alg_cdc_chunks(&data, true, 1024);
243 assert!(!chunks.is_empty(), "must return at least one chunk");
244 let reassembled: Vec<u8> = chunks.iter().flat_map(|c| c.iter().copied()).collect();
245 assert_eq!(reassembled, data);
246 }
247
248 #[test]
249 fn test_alg_cdc_chunks_utf32_exact_4_bytes() {
250 let data = [0x01, 0x02, 0x03, 0x04];
252 let chunks = alg_cdc_chunks(&data, true, 1024);
253 assert_eq!(chunks.len(), 1);
254 assert_eq!(chunks[0], &data[..]);
255 }
256
257 #[test]
258 fn test_alg_cdc_chunks_utf32_7_bytes() {
259 let data = [0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70];
261 let chunks = alg_cdc_chunks(&data, true, 1024);
262 assert!(!chunks.is_empty(), "must return at least one chunk");
263 let reassembled: Vec<u8> = chunks.iter().flat_map(|c| c.iter().copied()).collect();
264 assert_eq!(reassembled, data);
265 }
266
267 #[test]
268 fn test_alg_cdc_chunks_utf32_reassembly() {
269 let data: Vec<u8> = (0..=255).cycle().take(4096).collect();
272 assert_eq!(data.len() % 4, 0, "test data must be 4-byte aligned");
273 let chunks = alg_cdc_chunks(&data, true, 1024);
274 let reassembled: Vec<u8> = chunks.iter().flat_map(|c| c.iter().copied()).collect();
275 assert_eq!(reassembled, data);
276 if chunks.len() > 1 {
278 for (i, chunk) in chunks[..chunks.len() - 1].iter().enumerate() {
279 assert_eq!(
280 chunk.len() % 4,
281 0,
282 "chunk {i} has length {} which is not 4-byte aligned",
283 chunk.len()
284 );
285 }
286 }
287 }
288
289 #[test]
290 fn test_alg_cdc_chunks_utf32_empty() {
291 let chunks = alg_cdc_chunks(b"", true, 1024);
293 assert_eq!(chunks.len(), 1);
294 assert_eq!(chunks[0].len(), 0);
295 }
296}