1use rand::prelude::SliceRandom;
2use rand::Rng;
3use rand::{RngCore, SeedableRng};
4use rand_pcg::Mcg128Xsl64;
5use std::collections::BTreeSet;
6use std::rc::Rc;
7use std::sync::Mutex;
8
9use crate::position::*;
10
11struct RandWrap {
12 r: Mcg128Xsl64,
13 s: u64,
14}
15
16impl RandWrap {
17 fn new(seed: u64) -> Self {
18 Self {
19 r: Mcg128Xsl64::seed_from_u64(seed),
20 s: seed,
21 }
22 }
23
24 fn set_state_to(&mut self, offset: IndexPos) {
25 let u64_offset = offset.as_u64();
26 self.r = Mcg128Xsl64::seed_from_u64(self.s);
27 self.r.advance(u64_offset as u128);
28 }
29
30 fn next_u64(&mut self) -> u64 {
31 self.r.next_u64()
32 }
33}
34
35pub(crate) trait PatternGenerator {
37 fn setup(&mut self, offset: IndexPos);
38 fn next_u64(&mut self) -> u64;
39}
40
41pub(crate) struct TestBdState {
44 pub s: Rc<Mutex<Box<dyn PatternGenerator>>>,
45}
46
47impl std::clone::Clone for TestBdState {
48 fn clone(&self) -> Self {
49 Self { s: self.s.clone() }
50 }
51}
52
53pub(crate) struct PercentPattern {
54 pub fill: u32,
55 pub duplicates: u32,
56 pub random: u32,
57}
58
59pub(crate) struct DataMix {
60 mapping: Vec<(std::ops::Range<IndexPos>, Bucket)>,
61 r: RandWrap,
62 current: IndexPos,
63 start_range: IndexPos,
64 end_range: IndexPos,
65 cur_bucket: Bucket, dupe_block: [IndexPos; 64],
67}
68
69type PatGen = Box<dyn PatternGenerator>;
70type Segments = Vec<(std::ops::Range<IndexPos>, Bucket)>;
71
72impl DataMix {
73 pub fn create(
74 size: u64,
75 seed: u64,
76 segments: usize,
77 percents: &PercentPattern,
78 ) -> (PatGen, Segments) {
79 assert!(size.is_multiple_of(8));
80
81 assert!(
82 percents.duplicates + percents.fill + percents.random == 100,
83 "Percentages must sum to 100"
84 );
85
86 let mapping = split_range_into_random_subranges_with_buckets(
87 seed,
88 size,
89 segments,
90 (percents.fill, percents.duplicates, percents.random),
91 );
92
93 let mut dupe_block: [IndexPos; 64] = [IndexPos::new(0); 64];
94 for (idx, d) in dupe_block.iter_mut().enumerate() {
95 *d = IndexPos::new(idx as u64);
96 }
97
98 let mapping_copy = mapping.clone();
99
100 (
101 Box::new(Self {
102 mapping,
103 r: RandWrap::new(seed),
104 current: IndexPos::new(u64::MAX),
105 start_range: IndexPos::new(u64::MAX), end_range: IndexPos::new(0),
107 cur_bucket: Bucket::NotValid,
108 dupe_block,
109 }),
110 mapping_copy,
111 )
112 }
113
114 fn find_segment(&self, offset: IndexPos) -> Option<(std::ops::Range<IndexPos>, Bucket)> {
115 let index = match self
116 .mapping
117 .binary_search_by_key(&offset, |(start, _)| start.start)
118 {
119 Ok(i) => i,
120 Err(i) => i,
121 };
122
123 for i in [index, index.wrapping_sub(1)] {
124 if let Some((r, bucket)) = self.mapping.get(i) {
125 if r.start <= offset && offset < r.end {
126 return Some((r.clone(), *bucket));
127 }
128 }
129 }
130 None
131 }
132}
133
134impl PatternGenerator for DataMix {
135 fn setup(&mut self, offset: IndexPos) {
136 if !(self.start_range <= offset && offset < self.end_range) {
140 let (range, bucket) = self.find_segment(offset).unwrap();
141
142 assert!(range.start <= offset && offset < range.end);
143
144 self.start_range = range.start;
145 self.end_range = range.end;
146 self.cur_bucket = bucket;
147 }
148
149 self.r.set_state_to(offset);
152
153 self.current = offset;
156 }
157
158 fn next_u64(&mut self) -> u64 {
159 if self.current >= self.end_range {
161 self.setup(self.current);
162 }
163
164 let v = match self.cur_bucket {
165 Bucket::Fill => 0, Bucket::Random => self.r.next_u64(),
167 Bucket::Duplicate => self.dupe_block[(self.current.as_u64() % 64) as usize].as_u64(),
168 Bucket::NotValid => panic!("Not a valid bucket"),
169 };
170
171 self.current += IndexPos::new(1);
172 v
173 }
174}
175
176use serde::{Deserialize, Serialize};
177
178#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
193pub enum Bucket {
194 Fill,
199
200 Duplicate,
205
206 Random,
212
213 NotValid,
218}
219
220fn bucket_counts(
221 num_buckets: usize,
222 (p_fill, p_dup, p_rand): (u32, u32, u32),
223) -> (usize, usize, usize) {
224 assert_eq!(p_fill + p_dup + p_rand, 100);
225
226 let weights = [
227 ("fill", p_fill as f64),
228 ("dup", p_dup as f64),
229 ("rand", p_rand as f64),
230 ];
231
232 let ideal: Vec<(usize, f64, f64)> = weights
234 .iter()
235 .enumerate()
236 .map(|(idx, (_, w))| {
237 let v = *w * num_buckets as f64 / 100.0;
238 (idx, v.floor(), v.fract())
239 })
240 .collect();
241
242 let mut counts: [usize; 3] = [0; 3];
243 let mut used = 0usize;
244
245 for (idx, floor_val, _) in &ideal {
246 let c = *floor_val as usize;
247 counts[*idx] = c;
248 used += c;
249 }
250
251 let mut remaining = num_buckets - used;
252
253 let mut frac_sorted = ideal.clone();
255 frac_sorted.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap());
256
257 for (idx, _, _) in frac_sorted {
258 if remaining == 0 {
259 break;
260 }
261 counts[idx] += 1;
262 remaining -= 1;
263 }
264
265 (counts[0], counts[1], counts[2])
266}
267
268fn split_range_into_random_subranges_with_buckets(
270 seed: u64,
271 range_size: u64,
272 num_buckets: usize,
273 percentages: (u32, u32, u32),
274) -> Vec<(std::ops::Range<IndexPos>, Bucket)> {
275 assert!(
276 range_size.is_multiple_of(8),
277 "total block device must be a multiple of 8"
278 );
279
280 assert!(
281 num_buckets as u64 <= (range_size / 512),
282 "Cannot split into more subranges than elements in the range"
283 );
284 assert!(
285 percentages.0 + percentages.1 + percentages.2 == 100,
286 "Percentages must sum to 100"
287 );
288
289 let mut rng = Mcg128Xsl64::seed_from_u64(seed);
290 let mut split_points = BTreeSet::new();
291
292 let lo = 512 / 8;
294 let hi = range_size / 8;
295 while split_points.len() < num_buckets - 1 {
296 let point = IndexPos::new(rng.random_range(lo..hi));
297 split_points.insert(point);
298 }
299
300 let split_points: Vec<IndexPos> = split_points.into_iter().collect();
302
303 let mut subranges: Vec<std::ops::Range<IndexPos>> = Vec::new();
305 let mut start = IndexPos::new(0);
306
307 for &end in &split_points {
308 subranges.push(start..end);
309 start = end;
310 }
311
312 subranges.push(start..IndexPos::new(range_size / 8));
314
315 subranges.shuffle(&mut rng);
317
318 let mut result = Vec::new();
319
320 let (fill_cnt, dup_cnt, rand_cnt) = bucket_counts(num_buckets, percentages);
321
322 for (i, range) in subranges.into_iter().enumerate() {
323 if i < fill_cnt {
324 result.push((range, Bucket::Fill));
325 } else if i < fill_cnt + dup_cnt {
326 result.push((range, Bucket::Duplicate));
327 } else if i < fill_cnt + dup_cnt + rand_cnt {
328 result.push((range, Bucket::Random));
329 } else {
330 unreachable!("More subranges than expected");
332 }
333 }
334
335 result.sort_by_key(|(range, _)| range.start);
337
338 result
339}
340
341#[cfg(test)]
342mod tests {
343 use super::*;
344 use std::ops::Range;
345
346 fn total_bytes(range: &Range<IndexPos>) -> u64 {
347 (range.end - range.start) * 8
348 }
349
350 #[test]
351 fn test_fill_pattern() {
352 let (mut generator, segments) = DataMix::create(
353 6656,
354 12345,
355 1,
356 &PercentPattern {
357 fill: 100,
358 duplicates: 0,
359 random: 0,
360 },
361 );
362
363 assert_eq!(segments.len(), 1);
365 assert_eq!(segments[0].1, Bucket::Fill);
366
367 for byte_offset in (0..800).step_by(8) {
369 generator.setup(IndexPos::new(byte_offset));
370 let val = generator.next_u64();
371 assert_eq!(
372 val, 0,
373 "Fill pattern should return 0 at byte offset {}",
374 byte_offset
375 );
376 }
377 }
378
379 #[test]
380 fn test_duplicate_pattern_single_read() {
381 let (mut generator, segments) = DataMix::create(
382 4096,
383 12345,
384 1,
385 &PercentPattern {
386 fill: 0,
387 duplicates: 100,
388 random: 0,
389 },
390 );
391
392 assert_eq!(segments.len(), 1);
394 assert_eq!(segments[0].1, Bucket::Duplicate);
395
396 generator.setup(IndexPos::new(0));
403 for i in 0..256 {
404 let val = generator.next_u64();
405 let expected = (i % 64) as u64;
406 assert_eq!(val, expected, "Duplicate pattern at index {}", i);
407 }
408 }
409
410 #[test]
411 fn test_duplicate_pattern_with_offset() {
412 let (mut generator, _) = DataMix::create(
413 4096,
414 12345,
415 1,
416 &PercentPattern {
417 fill: 0,
418 duplicates: 100,
419 random: 0,
420 },
421 );
422
423 generator.setup(IndexPos::new(16));
426 let val = generator.next_u64();
427 assert_eq!(val, 16);
429
430 generator.setup(IndexPos::new(256));
432 let val = generator.next_u64();
433 assert_eq!(val, 0);
435
436 generator.setup(IndexPos::new(257));
438 let val = generator.next_u64();
439 assert_eq!(val, 1);
441 }
442
443 #[test]
444 fn test_duplicate_pattern_repeats() {
445 let (mut generator, _) = DataMix::create(
446 20480,
447 12345,
448 1,
449 &PercentPattern {
450 fill: 0,
451 duplicates: 100,
452 random: 0,
453 },
454 );
455
456 for index in 0..6 {
458 generator.setup(IndexPos::new(512 * index / 8));
460 let v = generator.next_u64();
461 assert_eq!(v, 0);
462 }
463 }
464
465 #[test]
466 fn test_random_pattern_deterministic() {
467 let seed = 42;
468 let (mut generator1, _) = DataMix::create(
469 6656,
470 seed,
471 1,
472 &PercentPattern {
473 fill: 0,
474 duplicates: 0,
475 random: 100,
476 },
477 );
478
479 let (mut generator2, _) = DataMix::create(
480 6656,
481 seed,
482 1,
483 &PercentPattern {
484 fill: 0,
485 duplicates: 0,
486 random: 100,
487 },
488 );
489
490 for byte_offset in (0..800).step_by(8) {
492 generator1.setup(IndexPos::new(byte_offset));
493 let val1 = generator1.next_u64();
494
495 generator2.setup(IndexPos::new(byte_offset));
496 let val2 = generator2.next_u64();
497
498 assert_eq!(
499 val1, val2,
500 "Random pattern should be deterministic at byte offset {}",
501 byte_offset
502 );
503 }
504 }
505
506 #[test]
507 fn test_random_pattern_different_seeds() {
508 let (mut generator1, _) = DataMix::create(
509 4096,
510 111,
511 1,
512 &PercentPattern {
513 fill: 0,
514 duplicates: 0,
515 random: 100,
516 },
517 );
518
519 let (mut generator2, _) = DataMix::create(
520 4096,
521 222,
522 1,
523 &PercentPattern {
524 fill: 0,
525 duplicates: 0,
526 random: 100,
527 },
528 );
529
530 let mut differences = 0;
532 let mut total = 0;
533 for byte_offset in (0..4096 / 8).step_by(8) {
534 generator1.setup(IndexPos::new(byte_offset));
535 let val1 = generator1.next_u64();
536
537 generator2.setup(IndexPos::new(byte_offset));
538 let val2 = generator2.next_u64();
539
540 if val1 != val2 {
541 differences += 1;
542 }
543 total += 1;
544 }
545
546 let percent_diff = differences as f64 / total as f64;
547
548 log::debug!("percent_diff = {percent_diff}");
549
550 assert!(
551 percent_diff > 0.9,
552 "Different seeds should produce mostly different values, got {} out of {total}",
553 differences
554 );
555 }
556
557 #[test]
558 fn test_mixed_pattern_segments() {
559 let (mut generator, segments) = DataMix::create(
560 1024 * 10,
561 12345,
562 10,
563 &PercentPattern {
564 fill: 40,
565 duplicates: 30,
566 random: 30,
567 },
568 );
569
570 assert_eq!(segments.len(), 10);
572
573 let fill_count = segments.iter().filter(|(_, b)| *b == Bucket::Fill).count();
575 let dup_count = segments
576 .iter()
577 .filter(|(_, b)| *b == Bucket::Duplicate)
578 .count();
579 let rand_count = segments
580 .iter()
581 .filter(|(_, b)| *b == Bucket::Random)
582 .count();
583
584 assert_eq!(fill_count, 4, "Should have 4 Fill segments (40%)");
586 assert_eq!(dup_count, 3, "Should have 3 Duplicate segments (30%)");
587 assert_eq!(rand_count, 3, "Should have 3 Random segments (30%)");
588
589 let mut last_end = IndexPos::new(0);
591 for (range, _) in &segments {
592 assert_eq!(range.start, last_end, "Segments should be contiguous");
593 assert!(range.end > range.start, "Segments should be non-empty");
594 last_end = range.end;
595 }
596 assert_eq!(
597 last_end,
598 IndexPos::new(1024 * 10 / 8),
599 "Segments should cover entire range"
600 );
601
602 for (range, bucket) in &segments {
604 generator.setup(range.start);
605 let val = generator.next_u64();
606
607 match bucket {
608 Bucket::Fill => {
609 assert_eq!(val, 0, "Fill segment at {} should return 0", range.start)
610 }
611 Bucket::Duplicate => {
612 let expected = range.start.as_u64() % 64;
613 assert_eq!(
614 val, expected,
615 "Duplicate segment at {} should match counter pattern",
616 range.start
617 );
618 }
619 Bucket::Random => {
620 }
622 Bucket::NotValid => panic!("this should not be seen"),
623 }
624 }
625 }
626
627 #[test]
628 fn test_setup_preserves_offset_position() {
629 let (mut generator, _) = DataMix::create(
630 4096,
631 12345,
632 1,
633 &PercentPattern {
634 fill: 0,
635 duplicates: 0,
636 random: 100,
637 },
638 );
639
640 generator.setup(IndexPos::new(100));
642 let val_100 = generator.next_u64();
643
644 for byte_offset in (200..400).step_by(8) {
646 generator.setup(IndexPos::new(byte_offset));
647 generator.next_u64();
648 }
649
650 generator.setup(IndexPos::new(100));
652 let val_100_again = generator.next_u64();
653
654 assert_eq!(
655 val_100, val_100_again,
656 "Same setup should produce same value"
657 );
658 }
659
660 #[test]
661 fn test_sequential_read_across_segments() {
662 let (mut generator, segments) = DataMix::create(
663 2048,
664 12345,
665 4,
666 &PercentPattern {
667 fill: 50,
668 duplicates: 25,
669 random: 25,
670 },
671 );
672
673 for offset_in_bytes in (0..2000).step_by(96) {
675 let index_position = IndexPos::new(offset_in_bytes / 8);
676
677 generator.setup(index_position);
678 let val = generator.next_u64();
679
680 let segment = segments
682 .iter()
683 .find(|(range, _)| range.contains(&index_position))
684 .unwrap_or_else(|| panic!("No segment for offset {}", index_position));
685
686 let (_range, bucket) = segment;
687
688 match bucket {
690 Bucket::Fill => assert_eq!(val, 0, "Fill at offset {}", index_position),
691 Bucket::Duplicate => {
692 let expected = index_position.as_u64() % 64;
693 assert_eq!(val, expected, "Duplicate at offset {}", index_position);
694 }
695 Bucket::Random => {}
696 Bucket::NotValid => panic!("Should not be seeing NotValid in normal operation"),
697 }
698 }
699 }
700
701 #[test]
702 fn test_continuous_sequential_read() {
703 let (mut generator, _) = DataMix::create(
705 1024,
706 12345,
707 1,
708 &PercentPattern {
709 fill: 0,
710 duplicates: 100,
711 random: 0,
712 },
713 );
714
715 generator.setup(IndexPos::new(0));
717
718 for i in 0..128 {
720 let val = generator.next_u64();
721 let expected = (i % 64) as u64;
722 assert_eq!(val, expected, "Continuous read at index {}", i);
723 }
724 }
725
726 #[test]
727 fn test_random_access_pattern() {
728 let (mut generator, _) = DataMix::create(
729 4096,
730 12345,
731 5,
732 &PercentPattern {
733 fill: 33,
734 duplicates: 33,
735 random: 34,
736 },
737 );
738
739 let test_offsets = vec![0, 800, 1600, 2400, 3200];
741
742 for &byte_offset in &test_offsets {
743 let index_offset = IndexPos::new(byte_offset / 8);
745 generator.setup(index_offset);
746 let val1 = generator.next_u64();
747
748 generator.setup(index_offset);
749 let val2 = generator.next_u64();
750
751 assert_eq!(
752 val1, val2,
753 "Same offset {} should give same value",
754 index_offset
755 );
756 }
757 }
758
759 #[test]
760 fn test_duplicate_pattern_boundary() {
761 let (mut generator, _) = DataMix::create(
762 4096,
763 12345,
764 1,
765 &PercentPattern {
766 fill: 0,
767 duplicates: 100,
768 random: 0,
769 },
770 );
771
772 generator.setup(IndexPos::new(254));
774 let val_62 = generator.next_u64();
775 assert_eq!(val_62, 62);
776
777 generator.setup(IndexPos::new(255));
778 let val_63 = generator.next_u64();
779 assert_eq!(val_63, 63);
780
781 generator.setup(IndexPos::new(256));
782 let val_256 = generator.next_u64();
783 assert_eq!(val_256, 0); generator.setup(IndexPos::new(257));
786 let val_257 = generator.next_u64();
787 assert_eq!(val_257, 1);
788 }
789
790 #[test]
791 fn test_segment_creation_coverage() {
792 for num_segments in [1, 5, 10, 50, 100] {
794 let size = 512 * 5000;
795 let (_generator, segments) = DataMix::create(
796 size,
797 12345,
798 num_segments,
799 &PercentPattern {
800 fill: 33,
801 duplicates: 33,
802 random: 34,
803 },
804 );
805
806 assert_eq!(segments.len(), num_segments);
807
808 let total_size: u64 = segments.iter().map(|(range, _)| total_bytes(range)).sum();
810 assert_eq!(total_size, size, "Segments must cover entire range");
811 }
812 }
813
814 #[test]
815 #[should_panic(expected = "Percentages must sum to 100")]
816 fn test_invalid_percentages() {
817 DataMix::create(
818 1024,
819 12345,
820 2,
821 &PercentPattern {
822 fill: 40,
823 duplicates: 40,
824 random: 40,
825 },
826 );
827 }
828
829 #[test]
830 fn test_segment_boundaries() {
831 let (mut generator, segments) = DataMix::create(
832 2048,
833 12345,
834 3,
835 &PercentPattern {
836 fill: 33,
837 duplicates: 33,
838 random: 34,
839 },
840 );
841
842 for (range, _) in &segments {
844 generator.setup(range.start);
845 let _val_start = generator.next_u64();
846
847 if range.end.as_u64() >= 8 {
848 let last_offset = range.end - IndexPos::new(8);
849 generator.setup(IndexPos::new(last_offset));
850 let _val_end = generator.next_u64();
851 }
852 }
853 }
854
855 #[test]
856 fn test_large_offset() {
857 let (mut generator, _) = DataMix::create(
858 100352,
859 12345,
860 1,
861 &PercentPattern {
862 fill: 0,
863 duplicates: 100,
864 random: 0,
865 },
866 );
867
868 for byte_offset in [0, 10000, 50000, 99000] {
870 let index_val = byte_offset / 8;
871 generator.setup(IndexPos::new(index_val));
872 let val = generator.next_u64();
873 let expected = index_val % 64;
874 assert_eq!(val, expected, "Duplicate pattern at offset {}", byte_offset);
875 }
876 }
877
878 #[test]
879 fn test_segment_transition() {
880 let (mut generator, segments) = DataMix::create(
881 2048,
882 12345,
883 4,
884 &PercentPattern {
885 fill: 50,
886 duplicates: 25,
887 random: 25,
888 },
889 );
890
891 for i in segments.iter().skip(1) {
893 let boundary = i.0.start;
894
895 if boundary.as_u64() >= 8 {
896 generator.setup(IndexPos::new(boundary - IndexPos::new(8)));
898 let _val_before = generator.next_u64();
899
900 generator.setup(boundary);
902 let _val_at = generator.next_u64();
903 }
904 }
905 }
906}