1use std::cmp::min;
2use std::io::{Read, Seek, SeekFrom};
3
4use bytes::Bytes;
5use more_asserts::{debug_assert_ge, debug_assert_le};
6
7use super::Chunk;
8use super::constants::{MAXIMUM_CHUNK_MULTIPLIER, MINIMUM_CHUNK_DIVISOR, TARGET_CHUNK_SIZE};
9
10lazy_static::lazy_static! {
11 pub static ref MAX_CHUNK_SIZE: usize = (*TARGET_CHUNK_SIZE) * (*MAXIMUM_CHUNK_MULTIPLIER);
13}
14
15pub struct Chunker {
18 hash: gearhash::Hasher<'static>,
20 minimum_chunk: usize,
21 maximum_chunk: usize,
22 mask: u64,
23
24 chunkbuf: Vec<u8>,
26}
27
28impl Default for Chunker {
29 fn default() -> Self {
30 Self::new(*TARGET_CHUNK_SIZE)
31 }
32}
33
34impl Chunker {
35 pub fn new(target_chunk_size: usize) -> Self {
36 assert_eq!(target_chunk_size.count_ones(), 1);
37
38 assert!(target_chunk_size > 64);
41
42 assert!(target_chunk_size < u32::MAX as usize);
45
46 let mask = (target_chunk_size - 1) as u64;
47
48 let mask = mask << mask.leading_zeros();
52 let minimum_chunk = target_chunk_size / *MINIMUM_CHUNK_DIVISOR;
53 let maximum_chunk = target_chunk_size * *MAXIMUM_CHUNK_MULTIPLIER;
54
55 assert!(maximum_chunk > minimum_chunk);
56
57 let hash = gearhash::Hasher::default();
58
59 Chunker {
60 hash,
61 minimum_chunk,
62 maximum_chunk,
63 mask,
64 chunkbuf: Vec::with_capacity(maximum_chunk),
66 }
67 }
68
69 fn new_with_min(target_chunk_size: usize, min_chunk_size: usize) -> Self {
72 let mut chunker = Self::new(target_chunk_size);
73 chunker.minimum_chunk = min_chunk_size;
74 chunker
75 }
76
77 #[inline]
81 pub fn next_boundary(&mut self, data: &[u8]) -> Option<usize> {
82 const HASH_WINDOW_SIZE: usize = 64;
83 let n_bytes = data.len();
84
85 if n_bytes == 0 {
86 return None;
87 }
88
89 let previous_len = self.chunkbuf.len();
90 let mut cur_index = 0;
91 let mut create_chunk = false;
92
93 if previous_len + HASH_WINDOW_SIZE < self.minimum_chunk {
97 let skip = min(self.minimum_chunk - previous_len - HASH_WINDOW_SIZE - 1, n_bytes);
98 cur_index += skip;
99 }
100
101 let read_end = n_bytes.min(cur_index + self.maximum_chunk - previous_len);
104
105 loop {
106 if let Some(next_match) = self.hash.next_match(&data[cur_index..read_end], self.mask) {
107 cur_index += next_match;
108
109 if cur_index + previous_len < self.minimum_chunk {
119 continue;
120 }
121
122 create_chunk = true;
123 } else {
124 cur_index = read_end;
125 }
126
127 break;
128 }
129
130 if cur_index + previous_len >= self.maximum_chunk {
132 cur_index = self.maximum_chunk - previous_len;
133 create_chunk = true;
134 }
135
136 if create_chunk {
137 self.hash.set_hash(0); debug_assert_ge!(cur_index + previous_len, self.minimum_chunk);
139 debug_assert_le!(cur_index + previous_len, self.maximum_chunk);
140 Some(cur_index)
141 } else {
142 None
143 }
144 }
145
146 fn reset_state(&mut self) {
147 self.hash.set_hash(0);
151 debug_assert!(self.chunkbuf.is_empty());
152 }
153
154 pub fn next(&mut self, data: &[u8], is_final: bool) -> (Option<Chunk>, usize) {
161 let (chunk_data, consume): (Bytes, usize) = {
162 if let Some(next_boundary) = self.next_boundary(data) {
163 if self.chunkbuf.is_empty() {
164 (Bytes::copy_from_slice(&data[..next_boundary]), next_boundary)
165 } else {
166 self.chunkbuf.extend_from_slice(&data[..next_boundary]);
167 (std::mem::take(&mut self.chunkbuf).into(), next_boundary)
168 }
169 } else if is_final {
170 let r = if self.chunkbuf.is_empty() {
172 (Bytes::copy_from_slice(data), data.len())
173 } else {
174 self.chunkbuf.extend_from_slice(data);
175 (std::mem::take(&mut self.chunkbuf).into(), data.len())
176 };
177
178 if is_final {
179 self.reset_state();
180 }
181
182 r
183 } else {
184 self.chunkbuf.extend_from_slice(data);
185 return (None, data.len());
186 }
187 };
188
189 if chunk_data.is_empty() {
191 return (None, 0);
192 }
193
194 (Some(Chunk::new(chunk_data)), consume)
195 }
196
197 pub fn next_block(&mut self, data: &[u8], is_final: bool) -> Vec<Chunk> {
200 let mut ret = Vec::new();
201
202 let mut pos = 0;
203 loop {
204 debug_assert!(pos <= data.len());
205 if pos == data.len() {
206 if is_final {
207 self.reset_state();
208 }
209
210 return ret;
211 }
212
213 let (maybe_chunk, bytes_consumed) = self.next(&data[pos..], is_final);
214
215 if let Some(chunk) = maybe_chunk {
216 ret.push(chunk);
217 }
218
219 pos += bytes_consumed;
220 }
221 }
222
223 pub fn next_block_bytes(&mut self, data: &Bytes, is_final: bool) -> Vec<Chunk> {
231 let mut ret = Vec::new();
232
233 let mut pos = 0;
234
235 if !self.chunkbuf.is_empty() {
238 let (maybe_chunk, skip_idx) = self.next(data, is_final);
239 if let Some(chunk) = maybe_chunk {
240 ret.push(chunk);
241 }
242 pos = skip_idx;
243 }
244
245 while pos < data.len() {
246 let maybe_next_boundary = self.next_boundary(&data[pos..]);
247
248 if let Some(chunk_size) = maybe_next_boundary {
249 let next_pos = pos + chunk_size;
250 ret.push(Chunk::new(data.slice(pos..next_pos)));
251 pos = next_pos;
252 } else {
253 if is_final {
255 ret.push(Chunk::new(data.slice(pos..)));
256 } else {
257 self.chunkbuf.extend_from_slice(&data[pos..]);
258 }
259 break;
260 }
261 }
262
263 if is_final {
264 self.reset_state();
265 }
266
267 ret
268 }
269
270 pub fn finish(&mut self) -> Option<Chunk> {
272 self.next(&[], true).0
273 }
274}
275
276pub fn find_partitions<R: Read + Seek>(
289 reader: &mut R,
290 file_size: usize,
291 target_chunk_size: usize,
292 min_partition_size: usize,
293 partition_scan_bytes: usize,
294) -> std::io::Result<Vec<usize>> {
295 assert!(min_partition_size > 0);
296 let mut partitions: Vec<usize> = Vec::new();
297 partitions.push(0);
298 let minimum_chunk = target_chunk_size / *MINIMUM_CHUNK_DIVISOR;
302 let maximum_chunk = target_chunk_size * *MAXIMUM_CHUNK_MULTIPLIER;
303
304 assert!(minimum_chunk > 64);
305
306 if maximum_chunk >= min_partition_size {
307 return Ok(partitions);
308 }
309 let mut buf = vec![0u8; partition_scan_bytes];
310 let mut curpos: usize = 0;
311 while curpos < file_size {
319 curpos += min_partition_size;
320 if curpos + min_partition_size >= file_size || curpos + partition_scan_bytes >= file_size {
323 break;
324 }
325 reader.seek(SeekFrom::Start(curpos as u64))?;
327 reader.read_exact(&mut buf)?;
328 let mut chunker = Chunker::new_with_min(target_chunk_size, 0);
329 let chunks = chunker.next_block(&buf, false);
332 if chunks.is_empty() {
333 continue;
334 }
335 let mut offset = chunks[0].data.len();
337 offset += chunks[1].data.len();
338 for i in 2..chunks.len() {
339 let cprev = chunks[i - 1].data.len();
340 let c = chunks[i].data.len();
341 offset += chunks[i].data.len();
342 if cprev > minimum_chunk
343 && cprev < maximum_chunk - minimum_chunk
344 && c > minimum_chunk
345 && c < maximum_chunk - minimum_chunk
346 {
347 partitions.push(curpos + offset);
349 break;
350 }
351 }
352 }
353 Ok(partitions)
354}
355
356#[cfg(test)]
357mod tests {
358 use std::collections::HashSet;
359 use std::io::Cursor;
360
361 use rand::rngs::StdRng;
362 use rand::{Rng, SeedableRng};
363
364 use super::*;
365
366 fn make_test_data(seed: u64, len: usize) -> Vec<u8> {
369 let mut rng = StdRng::seed_from_u64(seed);
370 let mut data = vec![0; len];
371 rng.fill(&mut data[..]);
372 data
373 }
374
375 fn check_chunks_equal(chunks: &[Chunk], data: &[u8]) {
376 let mut new_vec = Vec::with_capacity(10000);
378 for c in chunks.iter() {
379 new_vec.extend_from_slice(&c.data[..]);
380 }
381
382 assert!(new_vec == data);
383 }
384
385 #[derive(Default)]
389 struct ChunkerTestWrapper {
390 chunker_chunks: Chunker,
391 chunker_bytes: Chunker,
392 }
393
394 impl ChunkerTestWrapper {
395 fn new(target_chunk_size: usize) -> Self {
396 ChunkerTestWrapper {
397 chunker_chunks: Chunker::new(target_chunk_size),
398 chunker_bytes: Chunker::new(target_chunk_size),
399 }
400 }
401
402 fn next_block(&mut self, data: &[u8], is_final: bool) -> Vec<Chunk> {
403 let chunks = self.chunker_chunks.next_block(data, is_final);
404 let bytes_chunks = self.chunker_bytes.next_block_bytes(&Bytes::copy_from_slice(data), is_final);
405
406 assert_eq!(chunks.len(), bytes_chunks.len());
408 for (c1, c2) in chunks.iter().zip(bytes_chunks.iter()) {
409 assert_eq!(c1.data, c2.data);
410 }
411
412 chunks
413 }
414
415 fn next_chunk(&mut self, data: &[u8], is_final: bool) -> (Option<Chunk>, usize) {
416 let (chunk, consumed) = self.chunker_chunks.next(data, is_final);
417 let (bytes_chunk, bytes_consumed) = self.chunker_bytes.next(&Bytes::copy_from_slice(data), is_final);
418
419 if let Some(c) = &chunk {
421 assert_eq!(c.data, bytes_chunk.unwrap().data);
422 } else {
423 assert!(bytes_chunk.is_none());
424 }
425
426 (chunk, consumed.max(bytes_consumed))
427 }
428 }
429
430 #[test]
431 fn test_empty_data_no_chunk_until_final() {
432 let mut chunker = ChunkerTestWrapper::new(128);
433
434 let (chunk, consumed) = chunker.next_chunk(&[], false);
436 assert!(chunk.is_none());
437 assert_eq!(consumed, 0);
438
439 let (chunk, consumed) = chunker.next_chunk(&[], true);
441 assert!(chunk.is_none());
442 assert_eq!(consumed, 0);
443 }
444
445 #[test]
446 fn test_data_smaller_than_minimum_no_boundary() {
447 let mut chunker = ChunkerTestWrapper::new(128);
448
449 let data = make_test_data(0, 63);
451
452 let (chunk, consumed) = chunker.next_chunk(&data, false);
455 assert!(chunk.is_none());
456 assert_eq!(consumed, data.len());
457
458 let (chunk, consumed) = chunker.next_chunk(&[], true);
460 assert!(chunk.is_some());
461 assert_eq!(consumed, 0);
462
463 let chunk = chunk.unwrap();
464 assert_eq!(chunk.data.len(), 63);
465 assert_eq!(&chunk.data[..], &data[..], "Chunk should contain exactly what was passed in");
466 }
467
468 #[test]
469 fn test_multiple_chunks_produced() {
470 let mut chunker = ChunkerTestWrapper::new(128);
471
472 let data = make_test_data(42, 10000);
474
475 let chunks = chunker.next_block(&data, true);
477 assert!(!chunks.is_empty());
478
479 check_chunks_equal(&chunks, &data);
480 }
481
482 #[test]
483 fn test_repeated_calls_partial_consumption() {
484 let data = make_test_data(42, 10000);
487
488 let mut chunks_1 = Vec::new();
489
490 let mut pos = 0;
491 let mut chunker = ChunkerTestWrapper::new(128);
492
493 while pos < data.len() {
494 for i in 0..16 {
495 let next_pos = (pos + i).min(data.len());
496 chunks_1.append(&mut chunker.next_block(&data[pos..next_pos], next_pos == data.len()));
497 pos = next_pos;
498 }
499 }
500
501 check_chunks_equal(&chunks_1, &data);
502
503 let chunks_2 = ChunkerTestWrapper::new(128).next_block(&data, true);
505
506 assert_eq!(chunks_1, chunks_2);
507 }
508
509 #[test]
510 fn test_exact_maximum_chunk() {
511 let mut chunker = ChunkerTestWrapper::new(512);
515
516 let data = vec![0; 8 * *MAXIMUM_CHUNK_MULTIPLIER * 512];
518
519 let chunks = chunker.next_block(&data, true);
520
521 assert_eq!(chunks.len(), 8);
522
523 for c in chunks.iter() {
524 assert_eq!(c.data.len(), *MAXIMUM_CHUNK_MULTIPLIER * 512);
525 }
526 }
527
528 #[test]
529 fn test_partition() {
530 for _i in 1..5 {
531 let data = make_test_data(42, 1000000);
532 let mut chunker = Chunker::new(1024);
533 let chunks = chunker.next_block(&data, true);
534 let mut chunk_offsets = HashSet::new();
535 let mut offset = 0;
536 eprintln!("{:?}", chunker.minimum_chunk);
537 for i in 0..chunks.len() {
538 chunk_offsets.insert(offset);
539 offset += chunks[i].data.len();
540 }
541
542 let partitions =
543 find_partitions(&mut Cursor::new(&mut data.as_slice()), data.len(), 1024, 100000, 10000).unwrap();
544 assert!(partitions.len() > 1);
545 for i in 0..partitions.len() {
546 assert!(chunk_offsets.contains(&partitions[i]));
547 }
548 }
549 }
550
551 fn splitmix64_next(state: &mut u64) -> u64 {
554 *state = state.wrapping_add(0x9E3779B97F4A7C15);
555 let mut z = *state;
556 z = (z ^ (z >> 30)).wrapping_mul(0xBF58476D1CE4E5B9);
557 z = (z ^ (z >> 27)).wrapping_mul(0x94D049BB133111EB);
558 z ^ (z >> 31)
559 }
560
561 fn create_random_data(n: usize, seed: u64) -> Vec<u8> {
562 let mut ret = Vec::with_capacity(n + 7);
566
567 let mut state = seed;
568
569 while ret.len() < n {
570 let next_u64 = splitmix64_next(&mut state);
571 ret.extend_from_slice(&next_u64.to_le_bytes());
572 }
573
574 ret.resize(n, 0);
576
577 ret
578 }
579
580 fn get_chunk_boundaries(chunks: &[Chunk]) -> Vec<usize> {
581 chunks
582 .iter()
583 .scan(0, |state, chunk| {
584 *state += chunk.data.len();
585 Some(*state)
586 })
587 .collect()
588 }
589
590 #[test]
591 fn test_chunk_boundaries() {
592 let data = create_random_data(256000, 1);
593
594 let chunks = ChunkerTestWrapper::default().next_block(&data, true);
596
597 let ref_chunk_boundaries: Vec<usize> = get_chunk_boundaries(&chunks);
599
600 for add_size in [1, 37, 255] {
602 let mut chunker = Chunker::default();
603
604 let mut alt_chunks = Vec::with_capacity(chunks.len());
606
607 let mut pos = 0;
608 while pos < data.len() {
609 let next_pos = (pos + add_size).min(data.len());
610 let next_chunk = chunker.next_block(&data[pos..next_pos], next_pos == data.len());
611 alt_chunks.extend(next_chunk);
612 pos = next_pos;
613 }
614
615 let alt_boundaries = get_chunk_boundaries(&alt_chunks);
616
617 assert_eq!(alt_boundaries, ref_chunk_boundaries);
618 }
619 }
620
621 #[test]
622 fn test_correctness_1mb_random_data() {
623 let data = create_random_data(1000000, 0);
625
626 assert_eq!(data[0], 175);
632 assert_eq!(data[127], 132);
633 assert_eq!(data[111111], 118);
634
635 let chunks = ChunkerTestWrapper::default().next_block(&data, true);
637
638 let chunk_boundaries: Vec<usize> = get_chunk_boundaries(&chunks);
640
641 assert_eq!(
644 chunk_boundaries,
645 vec![
646 84493, 134421, 144853, 243318, 271793, 336457, 467529, 494581, 582000, 596735, 616815, 653164, 678202,
647 724510, 815591, 827760, 958832, 991092, 1000000
648 ]
649 );
650 }
651
652 #[test]
653 fn test_correctness_1mb_const_data() {
654 let data = vec![59u8; 1000000];
656
657 let chunks = ChunkerTestWrapper::default().next_block(&data, true);
659
660 let chunk_boundaries: Vec<usize> = get_chunk_boundaries(&chunks);
662
663 assert_eq!(chunk_boundaries, vec![131072, 262144, 393216, 524288, 655360, 786432, 917504, 1000000])
666 }
667
668 fn get_triggering_base_data(n: usize, padding: usize) -> Vec<u8> {
669 let mut data = vec![
672 154, 52, 42, 34, 159, 75, 126, 224, 70, 236, 12, 196, 79, 236, 178, 124, 127, 50, 99, 178, 44, 176, 174,
673 126, 250, 235, 205, 174, 252, 122, 35, 10, 20, 101, 214, 69, 193, 8, 115, 105, 158, 228, 120, 111, 136,
674 162, 198, 251, 211, 183, 253, 252, 164, 147, 63, 16, 186, 162, 117, 23, 170, 36, 205, 187, 174, 76, 210,
675 174, 211, 175, 12, 173, 145, 59, 2, 70, 222, 181, 159, 227, 182, 156, 189, 51, 226, 106, 24, 50, 183, 157,
676 140, 10, 8, 23, 212, 70, 10, 234, 23, 33, 219, 254, 39, 236, 70, 49, 191, 116, 9, 115, 15, 101, 26, 159,
677 165, 220, 15, 170, 56, 125, 92, 163, 94, 235, 38, 40, 49, 81,
678 ];
679
680 data.resize(data.len() + padding, 0u8);
682
683 while data.len() < n {
685 let n_take = (n - data.len()).min(data.len());
686 data.extend_from_within(0..n_take);
687 }
688
689 data
690 }
691
692 #[test]
693 fn test_correctness_100kb_hitting_data() {
694 let mut data_sample_at_11111 = [0u8; 128];
700 let mut ref_cb = vec![Vec::new(); 128];
701
702 data_sample_at_11111[0] = 236;
703 ref_cb[0] = vec![8256, 16448, 24640, 32832, 41024, 49216, 57408, 65536];
704 data_sample_at_11111[1] = 50;
705 ref_cb[1] = vec![8320, 16576, 24832, 33088, 41344, 49600, 57856, 65536];
706 data_sample_at_11111[2] = 36;
707 ref_cb[2] = vec![8254, 16574, 24894, 33214, 41534, 49854, 58174, 65536];
708 data_sample_at_11111[3] = 116;
709 ref_cb[3] = vec![8317, 16570, 24823, 33076, 41329, 49582, 57835, 65536];
710 data_sample_at_11111[4] = 126;
711 ref_cb[4] = vec![8248, 16564, 24880, 33196, 41512, 49828, 58144, 65536];
712 data_sample_at_11111[5] = 145;
713 ref_cb[5] = vec![8310, 16556, 24802, 33048, 41294, 49540, 57786, 65536];
714 data_sample_at_11111[6] = 235;
715 ref_cb[6] = vec![8238, 16546, 24854, 33162, 41470, 49778, 58086, 65536];
716 data_sample_at_11111[7] = 228;
717 ref_cb[7] = vec![8299, 16534, 24769, 33004, 41239, 49474, 57709, 65536];
718 data_sample_at_11111[8] = 70;
719 ref_cb[8] = vec![8224, 16520, 24816, 33112, 41408, 49704, 58000, 65536];
720 data_sample_at_11111[9] = 178;
721 ref_cb[9] = vec![8284, 16504, 24724, 32944, 41164, 49384, 57604, 65536];
722 data_sample_at_11111[10] = 173;
723 ref_cb[10] = vec![8206, 16486, 24766, 33046, 41326, 49606, 57886, 65536];
724 data_sample_at_11111[11] = 0;
725 ref_cb[11] = vec![8265, 16466, 24667, 32868, 41069, 49270, 57471, 65536];
726 data_sample_at_11111[12] = 252;
727 ref_cb[12] = vec![8324, 16584, 24844, 33104, 41364, 49624, 57884, 65536];
728 data_sample_at_11111[13] = 159;
729 ref_cb[13] = vec![8242, 16561, 24880, 33199, 41518, 49837, 58156, 65536];
730 data_sample_at_11111[14] = 69;
731 ref_cb[14] = vec![8300, 16536, 24772, 33008, 41244, 49480, 57716, 65536];
732 data_sample_at_11111[15] = 219;
733 ref_cb[15] = vec![8215, 16509, 24803, 33097, 41391, 49685, 57979, 65536];
734 data_sample_at_11111[16] = 126;
735 ref_cb[16] = vec![8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536];
736 data_sample_at_11111[17] = 10;
737 ref_cb[17] = vec![8329, 16594, 24859, 33124, 41389, 49654, 57919, 65536];
738 data_sample_at_11111[18] = 124;
739 ref_cb[18] = vec![8240, 16562, 24884, 33206, 41528, 49850, 58172, 65536];
740 data_sample_at_11111[19] = 24;
741 ref_cb[19] = vec![8296, 16528, 24760, 32992, 41224, 49456, 57688, 65536];
742 data_sample_at_11111[20] = 196;
743 ref_cb[20] = vec![8204, 16492, 24780, 33068, 41356, 49644, 57932, 65536];
744 data_sample_at_11111[21] = 106;
745 ref_cb[21] = vec![8259, 16454, 24649, 32844, 41039, 49234, 57429, 65536];
746 data_sample_at_11111[22] = 196;
747 ref_cb[22] = vec![8314, 16564, 24814, 33064, 41314, 49564, 57814, 65536];
748 data_sample_at_11111[23] = 183;
749 ref_cb[23] = vec![8218, 16523, 24828, 33133, 41438, 49743, 58048, 65536];
750 data_sample_at_11111[24] = 124;
751 ref_cb[24] = vec![8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536];
752 data_sample_at_11111[25] = 70;
753 ref_cb[25] = vec![8326, 16588, 24850, 33112, 41374, 49636, 57898, 65536];
754 data_sample_at_11111[26] = 126;
755 ref_cb[26] = vec![8226, 16542, 24858, 33174, 41490, 49806, 58122, 65536];
756 data_sample_at_11111[27] = 191;
757 ref_cb[27] = vec![8279, 16494, 24709, 32924, 41139, 49354, 57569, 65536];
758 data_sample_at_11111[28] = 69;
759 ref_cb[28] = vec![8332, 16600, 24868, 33136, 41404, 49672, 57940, 65536];
760 data_sample_at_11111[29] = 163;
761 ref_cb[29] = vec![8228, 16549, 24870, 33191, 41512, 49833, 58154, 65536];
762 data_sample_at_11111[30] = 252;
763 ref_cb[30] = vec![8280, 16496, 24712, 32928, 41144, 49360, 57576, 65536];
764 data_sample_at_11111[31] = 0;
765 ref_cb[31] = vec![8332, 16600, 24868, 33136, 41404, 49672, 57940, 65536];
766 data_sample_at_11111[32] = 173;
767 ref_cb[32] = vec![8224, 16544, 24864, 33184, 41504, 49824, 58144, 65536];
768 data_sample_at_11111[33] = 42;
769 ref_cb[33] = vec![8275, 16486, 24697, 32908, 41119, 49330, 57541, 65536];
770 data_sample_at_11111[34] = 70;
771 ref_cb[34] = vec![8326, 16588, 24850, 33112, 41374, 49636, 57898, 65536];
772 data_sample_at_11111[35] = 174;
773 ref_cb[35] = vec![8214, 16527, 24840, 33153, 41466, 49779, 58092, 65536];
774 data_sample_at_11111[36] = 235;
775 ref_cb[36] = vec![8264, 16464, 24664, 32864, 41064, 49264, 57464, 65536];
776 data_sample_at_11111[37] = 186;
777 ref_cb[37] = vec![8314, 16564, 24814, 33064, 41314, 49564, 57814, 65536];
778 data_sample_at_11111[38] = 0;
779 ref_cb[38] = vec![8198, 16498, 24798, 33098, 41398, 49698, 57998, 65536];
780 data_sample_at_11111[39] = 157;
781 ref_cb[39] = vec![8247, 16597, 24947, 33297, 41647, 49997, 58347, 65536];
782 data_sample_at_11111[40] = 126;
783 ref_cb[40] = vec![8296, 16528, 24760, 32992, 41224, 49456, 57688, 65536];
784 data_sample_at_11111[41] = 49;
785 ref_cb[41] = vec![8345, 16626, 24907, 33188, 41469, 49750, 58031, 65536];
786 data_sample_at_11111[42] = 36;
787 ref_cb[42] = vec![8224, 16554, 24884, 33214, 41544, 49874, 58204, 65536];
788 data_sample_at_11111[43] = 0;
789 ref_cb[43] = vec![8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536];
790 data_sample_at_11111[44] = 236;
791 ref_cb[44] = vec![8320, 16576, 24832, 33088, 41344, 49600, 57856, 65536];
792 data_sample_at_11111[45] = 105;
793 ref_cb[45] = vec![8195, 16499, 24803, 33107, 41411, 49715, 58019, 65536];
794 data_sample_at_11111[46] = 0;
795 ref_cb[46] = vec![8242, 16594, 24946, 33298, 41650, 50002, 58354, 65536];
796 data_sample_at_11111[47] = 24;
797 ref_cb[47] = vec![8289, 16514, 24739, 32964, 41189, 49414, 57639, 65536];
798 data_sample_at_11111[48] = 126;
799 ref_cb[48] = vec![8336, 16608, 24880, 33152, 41424, 49696, 57968, 65536];
800 data_sample_at_11111[49] = 0;
801 ref_cb[49] = vec![8206, 16525, 24844, 33163, 41482, 49801, 58120, 65536];
802 data_sample_at_11111[50] = 70;
803 ref_cb[50] = vec![8252, 16618, 24984, 33350, 41716, 50082, 58448, 65536];
804 data_sample_at_11111[51] = 236;
805 ref_cb[51] = vec![8298, 16532, 24766, 33000, 41234, 49468, 57702, 65536];
806 data_sample_at_11111[52] = 0;
807 ref_cb[52] = vec![8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536];
808 data_sample_at_11111[53] = 12;
809 ref_cb[53] = vec![8209, 16535, 24861, 33187, 41513, 49839, 58165, 65536];
810 data_sample_at_11111[54] = 236;
811 ref_cb[54] = vec![8254, 16626, 24998, 33370, 41742, 50114, 58486, 65536];
812 data_sample_at_11111[55] = 0;
813 ref_cb[55] = vec![8299, 16534, 24769, 33004, 41239, 49474, 57709, 65536];
814 data_sample_at_11111[56] = 173;
815 ref_cb[56] = vec![8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536];
816 data_sample_at_11111[57] = 196;
817 ref_cb[57] = vec![8204, 16529, 24854, 33179, 41504, 49829, 58154, 65536];
818 data_sample_at_11111[58] = 0;
819 ref_cb[58] = vec![8248, 16618, 24988, 33358, 41728, 50098, 58468, 65536];
820 data_sample_at_11111[59] = 159;
821 ref_cb[59] = vec![8292, 16520, 24748, 32976, 41204, 49432, 57660, 65536];
822 data_sample_at_11111[60] = 178;
823 ref_cb[60] = vec![8336, 16608, 24880, 33152, 41424, 49696, 57968, 65536];
824 data_sample_at_11111[61] = 0;
825 ref_cb[61] = vec![8380, 16696, 25012, 33328, 41644, 49960, 58276, 65536];
826 data_sample_at_11111[62] = 10;
827 ref_cb[62] = vec![8234, 16594, 24954, 33314, 41674, 50034, 58394, 65536];
828 data_sample_at_11111[63] = 101;
829 ref_cb[63] = vec![8277, 16490, 24703, 32916, 41129, 49342, 57555, 65536];
830 data_sample_at_11111[64] = 0;
831 ref_cb[64] = vec![8320, 16576, 24832, 33088, 41344, 49600, 57856, 65536];
832 data_sample_at_11111[65] = 15;
833 ref_cb[65] = vec![8363, 16662, 24961, 33260, 41559, 49858, 58157, 65536];
834 data_sample_at_11111[66] = 147;
835 ref_cb[66] = vec![8212, 16554, 24896, 33238, 41580, 49922, 58264, 65536];
836 data_sample_at_11111[67] = 0;
837 ref_cb[67] = vec![8254, 16639, 25024, 33409, 41794, 50179, 58564, 65536];
838 data_sample_at_11111[68] = 0;
839 ref_cb[68] = vec![8296, 16528, 24760, 32992, 41224, 49456, 57688, 65536];
840 data_sample_at_11111[69] = 227;
841 ref_cb[69] = vec![8338, 16612, 24886, 33160, 41434, 49708, 57982, 65536];
842 data_sample_at_11111[70] = 126;
843 ref_cb[70] = vec![8380, 16696, 25012, 33328, 41644, 49960, 58276, 65536];
844 data_sample_at_11111[71] = 0;
845 ref_cb[71] = vec![8223, 16581, 24939, 33297, 41655, 50013, 58371, 65536];
846 data_sample_at_11111[72] = 101;
847 ref_cb[72] = vec![8264, 16464, 24664, 32864, 41064, 49264, 57464, 65536];
848 data_sample_at_11111[73] = 186;
849 ref_cb[73] = vec![8305, 16546, 24787, 33028, 41269, 49510, 57751, 65536];
850 data_sample_at_11111[74] = 52;
851 ref_cb[74] = vec![8346, 16628, 24910, 33192, 41474, 49756, 58038, 65536];
852 data_sample_at_11111[75] = 0;
853 ref_cb[75] = vec![8387, 16710, 25033, 33356, 41679, 50002, 58325, 65536];
854 data_sample_at_11111[76] = 70;
855 ref_cb[76] = vec![8224, 16588, 24952, 33316, 41680, 50044, 58408, 65536];
856 data_sample_at_11111[77] = 228;
857 ref_cb[77] = vec![8264, 16464, 24664, 32864, 41064, 49264, 57464, 65536];
858 data_sample_at_11111[78] = 0;
859 ref_cb[78] = vec![8304, 16544, 24784, 33024, 41264, 49504, 57744, 65536];
860 data_sample_at_11111[79] = 0;
861 ref_cb[79] = vec![8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536];
862 data_sample_at_11111[80] = 50;
863 ref_cb[80] = vec![8384, 16704, 25024, 33344, 41664, 49984, 58304, 65536];
864 data_sample_at_11111[81] = 214;
865 ref_cb[81] = vec![8215, 16575, 24935, 33295, 41655, 50015, 58375, 65536];
866 data_sample_at_11111[82] = 0;
867 ref_cb[82] = vec![8254, 16654, 25054, 33454, 41854, 50254, 58654, 65536];
868 data_sample_at_11111[83] = 0;
869 ref_cb[83] = vec![8293, 16522, 24751, 32980, 41209, 49438, 57667, 65536];
870 data_sample_at_11111[84] = 50;
871 ref_cb[84] = vec![8332, 16600, 24868, 33136, 41404, 49672, 57940, 65536];
872 data_sample_at_11111[85] = 69;
873 ref_cb[85] = vec![8371, 16678, 24985, 33292, 41599, 49906, 58213, 65536];
874 data_sample_at_11111[86] = 0;
875 ref_cb[86] = vec![8196, 16542, 24888, 33234, 41580, 49926, 58272, 65536];
876 data_sample_at_11111[87] = 0;
877 ref_cb[87] = vec![8234, 16619, 25004, 33389, 41774, 50159, 58544, 65536];
878 data_sample_at_11111[88] = 70;
879 ref_cb[88] = vec![8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536];
880 data_sample_at_11111[89] = 136;
881 ref_cb[89] = vec![8310, 16556, 24802, 33048, 41294, 49540, 57786, 65536];
882 data_sample_at_11111[90] = 0;
883 ref_cb[90] = vec![8348, 16632, 24916, 33200, 41484, 49768, 58052, 65536];
884 data_sample_at_11111[91] = 0;
885 ref_cb[91] = vec![8386, 16708, 25030, 33352, 41674, 49996, 58318, 65536];
886 data_sample_at_11111[92] = 101;
887 ref_cb[92] = vec![8204, 16564, 24924, 33284, 41644, 50004, 58364, 65536];
888 data_sample_at_11111[93] = 36;
889 ref_cb[93] = vec![8241, 16639, 25037, 33435, 41833, 50231, 58629, 65536];
890 data_sample_at_11111[94] = 196;
891 ref_cb[94] = vec![8278, 16492, 24706, 32920, 41134, 49348, 57562, 65536];
892 data_sample_at_11111[95] = 0;
893 ref_cb[95] = vec![8315, 16566, 24817, 33068, 41319, 49570, 57821, 65536];
894 data_sample_at_11111[96] = 0;
895 ref_cb[96] = vec![8352, 16640, 24928, 33216, 41504, 49792, 58080, 65536];
896 data_sample_at_11111[97] = 24;
897 ref_cb[97] = vec![8389, 16714, 25039, 33364, 41689, 50014, 58339, 65536];
898 data_sample_at_11111[98] = 8;
899 ref_cb[98] = vec![8200, 16562, 24924, 33286, 41648, 50010, 58372, 65536];
900 data_sample_at_11111[99] = 0;
901 ref_cb[99] = vec![8236, 16635, 25034, 33433, 41832, 50231, 58630, 65536];
902 data_sample_at_11111[100] = 0;
903 ref_cb[100] = vec![8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536];
904 data_sample_at_11111[101] = 125;
905 ref_cb[101] = vec![8308, 16552, 24796, 33040, 41284, 49528, 57772, 65536];
906 data_sample_at_11111[102] = 173;
907 ref_cb[102] = vec![8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536];
908 data_sample_at_11111[103] = 126;
909 ref_cb[103] = vec![8380, 16696, 25012, 33328, 41644, 49960, 58276, 65536];
910 data_sample_at_11111[104] = 0;
911 ref_cb[104] = vec![8416, 16768, 25120, 33472, 41824, 50176, 58528, 65536];
912 data_sample_at_11111[105] = 0;
913 ref_cb[105] = vec![8219, 16607, 24995, 33383, 41771, 50159, 58547, 65536];
914 data_sample_at_11111[106] = 159;
915 ref_cb[106] = vec![8254, 16678, 25102, 33526, 41950, 50374, 58798, 65536];
916 data_sample_at_11111[107] = 210;
917 ref_cb[107] = vec![8289, 16514, 24739, 32964, 41189, 49414, 57639, 65536];
918 data_sample_at_11111[108] = 178;
919 ref_cb[108] = vec![8324, 16584, 24844, 33104, 41364, 49624, 57884, 65536];
920 data_sample_at_11111[109] = 0;
921 ref_cb[109] = vec![8359, 16654, 24949, 33244, 41539, 49834, 58129, 65536];
922 data_sample_at_11111[110] = 0;
923 ref_cb[110] = vec![8394, 16724, 25054, 33384, 41714, 50044, 58374, 65536];
924 data_sample_at_11111[111] = 170;
925 ref_cb[111] = vec![8429, 16794, 25159, 33524, 41889, 50254, 58619, 65536];
926 data_sample_at_11111[112] = 173;
927 ref_cb[112] = vec![8224, 16624, 25024, 33424, 41824, 50224, 58624, 65536];
928 data_sample_at_11111[113] = 235;
929 ref_cb[113] = vec![8258, 16452, 24646, 32840, 41034, 49228, 57422, 65536];
930 data_sample_at_11111[114] = 0;
931 ref_cb[114] = vec![8292, 16520, 24748, 32976, 41204, 49432, 57660, 65536];
932 data_sample_at_11111[115] = 0;
933 ref_cb[115] = vec![8326, 16588, 24850, 33112, 41374, 49636, 57898, 65536];
934 data_sample_at_11111[116] = 0;
935 ref_cb[116] = vec![8360, 16656, 24952, 33248, 41544, 49840, 58136, 65536];
936 data_sample_at_11111[117] = 24;
937 ref_cb[117] = vec![8394, 16724, 25054, 33384, 41714, 50044, 58374, 65536];
938 data_sample_at_11111[118] = 228;
939 ref_cb[118] = vec![8428, 16792, 25156, 33520, 41884, 50248, 58612, 65536];
940 data_sample_at_11111[119] = 0;
941 ref_cb[119] = vec![8215, 16613, 25011, 33409, 41807, 50205, 58603, 65536];
942 data_sample_at_11111[120] = 0;
943 ref_cb[120] = vec![8248, 16680, 25112, 33544, 41976, 50408, 58840, 65536];
944 data_sample_at_11111[121] = 0;
945 ref_cb[121] = vec![8281, 16498, 24715, 32932, 41149, 49366, 57583, 65536];
946 data_sample_at_11111[122] = 101;
947 ref_cb[122] = vec![8314, 16564, 24814, 33064, 41314, 49564, 57814, 65536];
948 data_sample_at_11111[123] = 174;
949 ref_cb[123] = vec![8347, 16630, 24913, 33196, 41479, 49762, 58045, 65536];
950 data_sample_at_11111[124] = 126;
951 ref_cb[124] = vec![8380, 16696, 25012, 33328, 41644, 49960, 58276, 65536];
952 data_sample_at_11111[125] = 0;
953 ref_cb[125] = vec![8413, 16762, 25111, 33460, 41809, 50158, 58507, 65536];
954 data_sample_at_11111[126] = 0;
955 ref_cb[126] = vec![8192, 16574, 24956, 33338, 41720, 50102, 58484, 65536];
956 data_sample_at_11111[127] = 0;
957 ref_cb[127] = vec![8224, 16639, 25054, 33469, 41884, 50299, 58714, 65536];
958
959 for i in 0..128 {
961 let data = get_triggering_base_data(65536, i);
962
963 assert_eq!(data[11111], data_sample_at_11111[i]);
966
967 let chunks = ChunkerTestWrapper::default().next_block(&data, true);
972
973 let chunk_boundaries: Vec<usize> = get_chunk_boundaries(&chunks);
975
976 assert_eq!(chunk_boundaries, ref_cb[i]);
980 }
981
982 }
985}