test_bd/
data_pattern.rs

1use rand::prelude::SliceRandom;
2use rand::Rng;
3use rand::{RngCore, SeedableRng};
4use rand_pcg::Mcg128Xsl64;
5use std::collections::BTreeSet;
6use std::rc::Rc;
7use std::sync::Mutex;
8
9use crate::position::*;
10
11struct RandWrap {
12    r: Mcg128Xsl64,
13    s: u64,
14}
15
16impl RandWrap {
17    fn new(seed: u64) -> Self {
18        Self {
19            r: Mcg128Xsl64::seed_from_u64(seed),
20            s: seed,
21        }
22    }
23
24    fn set_state_to(&mut self, offset: IndexPos) {
25        let u64_offset = offset.as_u64();
26        self.r = Mcg128Xsl64::seed_from_u64(self.s);
27        self.r.advance(u64_offset as u128);
28    }
29
30    fn next_u64(&mut self) -> u64 {
31        self.r.next_u64()
32    }
33}
34
35// Expectation is we set the offset and walk the number of blocks 8 bytes at a time
36pub(crate) trait PatternGenerator {
37    fn setup(&mut self, offset: IndexPos);
38    fn next_u64(&mut self) -> u64;
39}
40
41// Eventually we want an interface and different data types that implement that interface that
42// each have unique behavior we are looking for.
43pub(crate) struct TestBdState {
44    pub s: Rc<Mutex<Box<dyn PatternGenerator>>>,
45}
46
47impl std::clone::Clone for TestBdState {
48    fn clone(&self) -> Self {
49        Self { s: self.s.clone() }
50    }
51}
52
53pub(crate) struct PercentPattern {
54    pub fill: u32,
55    pub duplicates: u32,
56    pub random: u32,
57}
58
59pub(crate) struct DataMix {
60    mapping: Vec<(std::ops::Range<IndexPos>, Bucket)>,
61    r: RandWrap,
62    current: IndexPos,
63    start_range: IndexPos,
64    end_range: IndexPos,
65    cur_bucket: Bucket, // When we switch buckets we will need to reset the random offset
66    dupe_block: [IndexPos; 64],
67}
68
69type PatGen = Box<dyn PatternGenerator>;
70type Segments = Vec<(std::ops::Range<IndexPos>, Bucket)>;
71
72impl DataMix {
73    pub fn create(
74        size: u64,
75        seed: u64,
76        segments: usize,
77        percents: &PercentPattern,
78    ) -> (PatGen, Segments) {
79        assert!(size.is_multiple_of(8));
80
81        assert!(
82            percents.duplicates + percents.fill + percents.random == 100,
83            "Percentages must sum to 100"
84        );
85
86        let mapping = split_range_into_random_subranges_with_buckets(
87            seed,
88            size,
89            segments,
90            (percents.fill, percents.duplicates, percents.random),
91        );
92
93        let mut dupe_block: [IndexPos; 64] = [IndexPos::new(0); 64];
94        for (idx, d) in dupe_block.iter_mut().enumerate() {
95            *d = IndexPos::new(idx as u64);
96        }
97
98        let mapping_copy = mapping.clone();
99
100        (
101            Box::new(Self {
102                mapping,
103                r: RandWrap::new(seed),
104                current: IndexPos::new(u64::MAX),
105                start_range: IndexPos::new(u64::MAX), // Use sentinel value to ensure first setup() always initializes
106                end_range: IndexPos::new(0),
107                cur_bucket: Bucket::NotValid,
108                dupe_block,
109            }),
110            mapping_copy,
111        )
112    }
113
114    fn find_segment(&self, offset: IndexPos) -> Option<(std::ops::Range<IndexPos>, Bucket)> {
115        let index = match self
116            .mapping
117            .binary_search_by_key(&offset, |(start, _)| start.start)
118        {
119            Ok(i) => i,
120            Err(i) => i,
121        };
122
123        for i in [index, index.wrapping_sub(1)] {
124            if let Some((r, bucket)) = self.mapping.get(i) {
125                if r.start <= offset && offset < r.end {
126                    return Some((r.clone(), *bucket));
127                }
128            }
129        }
130        None
131    }
132}
133
134impl PatternGenerator for DataMix {
135    fn setup(&mut self, offset: IndexPos) {
136        // If we are processing sequentially, we could already be in the correct spot
137        // Is this offset in the range we can currently handle?
138
139        if !(self.start_range <= offset && offset < self.end_range) {
140            let (range, bucket) = self.find_segment(offset).unwrap();
141
142            assert!(range.start <= offset && offset < range.end);
143
144            self.start_range = range.start;
145            self.end_range = range.end;
146            self.cur_bucket = bucket;
147        }
148
149        // If we're calling setup, we need to ensure that the random state is correct too,
150        // regardless if we have changed segments
151        self.r.set_state_to(offset);
152
153        // Regardless of where we are in a range, set the current location to offset.  Then
154        // as we call next we will bump this so we always know internally where we are
155        self.current = offset;
156    }
157
158    fn next_u64(&mut self) -> u64 {
159        // Check to see if we passed a segment boundary
160        if self.current >= self.end_range {
161            self.setup(self.current);
162        }
163
164        let v = match self.cur_bucket {
165            Bucket::Fill => 0, //TODO: Make configurable?
166            Bucket::Random => self.r.next_u64(),
167            Bucket::Duplicate => self.dupe_block[(self.current.as_u64() % 64) as usize].as_u64(),
168            Bucket::NotValid => panic!("Not a valid bucket"),
169        };
170
171        self.current += IndexPos::new(1);
172        v
173    }
174}
175
176use serde::{Deserialize, Serialize};
177
178/// Represents the type of data pattern used in a block device segment.
179///
180/// Each segment of the test block device can use a different pattern type,
181/// allowing for testing of various data scenarios including compression,
182/// deduplication, and encryption effectiveness.
183///
184/// # Examples
185///
186/// ```
187/// use test_bd::Bucket;
188///
189/// let pattern = Bucket::Fill;
190/// assert_eq!(pattern, Bucket::Fill);
191/// ```
192#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
193pub enum Bucket {
194    /// All data is filled with zeros (or a constant pattern).
195    ///
196    /// This pattern is useful for testing compression efficiency, as it
197    /// produces highly compressible data.
198    Fill,
199
200    /// Data follows a repeating pattern of 64 sequential values (0, 1, 2, ..., 63).
201    ///
202    /// This pattern is ideal for testing deduplication effectiveness, as the
203    /// same 512-byte blocks (64 * 8 bytes) repeat throughout the segment.
204    Duplicate,
205
206    /// Data is generated using a deterministic pseudo-random number generator.
207    ///
208    /// This pattern produces data that appears random but is reproducible using
209    /// the same seed. It's useful for testing scenarios where data should not
210    /// compress or deduplicate well, such as encrypted or already-compressed data.
211    Random,
212
213    /// Internal sentinel value indicating an uninitialized or invalid bucket.
214    ///
215    /// This variant should not appear in normal usage and indicates a programming error
216    /// if encountered during runtime.
217    NotValid,
218}
219
220fn bucket_counts(
221    num_buckets: usize,
222    (p_fill, p_dup, p_rand): (u32, u32, u32),
223) -> (usize, usize, usize) {
224    assert_eq!(p_fill + p_dup + p_rand, 100);
225
226    let weights = [
227        ("fill", p_fill as f64),
228        ("dup", p_dup as f64),
229        ("rand", p_rand as f64),
230    ];
231
232    // Ideal fractional counts
233    let ideal: Vec<(usize, f64, f64)> = weights
234        .iter()
235        .enumerate()
236        .map(|(idx, (_, w))| {
237            let v = *w * num_buckets as f64 / 100.0;
238            (idx, v.floor(), v.fract())
239        })
240        .collect();
241
242    let mut counts: [usize; 3] = [0; 3];
243    let mut used = 0usize;
244
245    for (idx, floor_val, _) in &ideal {
246        let c = *floor_val as usize;
247        counts[*idx] = c;
248        used += c;
249    }
250
251    let mut remaining = num_buckets - used;
252
253    // Give leftovers to the largest fractional parts
254    let mut frac_sorted = ideal.clone();
255    frac_sorted.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap());
256
257    for (idx, _, _) in frac_sorted {
258        if remaining == 0 {
259            break;
260        }
261        counts[idx] += 1;
262        remaining -= 1;
263    }
264
265    (counts[0], counts[1], counts[2])
266}
267
268// Units are in bytes
269fn split_range_into_random_subranges_with_buckets(
270    seed: u64,
271    range_size: u64,
272    num_buckets: usize,
273    percentages: (u32, u32, u32),
274) -> Vec<(std::ops::Range<IndexPos>, Bucket)> {
275    assert!(
276        range_size.is_multiple_of(8),
277        "total block device must be a multiple of 8"
278    );
279
280    assert!(
281        num_buckets as u64 <= (range_size / 512),
282        "Cannot split into more subranges than elements in the range"
283    );
284    assert!(
285        percentages.0 + percentages.1 + percentages.2 == 100,
286        "Percentages must sum to 100"
287    );
288
289    let mut rng = Mcg128Xsl64::seed_from_u64(seed);
290    let mut split_points = BTreeSet::new();
291
292    // Generate n-1 unique split points (ensure they are on 8 byte boundaries)
293    let lo = 512 / 8;
294    let hi = range_size / 8;
295    while split_points.len() < num_buckets - 1 {
296        let point = IndexPos::new(rng.random_range(lo..hi));
297        split_points.insert(point);
298    }
299
300    // Collect, the points will be sorted from the BTreeSet
301    let split_points: Vec<IndexPos> = split_points.into_iter().collect();
302
303    // Create subranges from the sorted split points
304    let mut subranges: Vec<std::ops::Range<IndexPos>> = Vec::new();
305    let mut start = IndexPos::new(0);
306
307    for &end in &split_points {
308        subranges.push(start..end);
309        start = end;
310    }
311
312    // Add the last range
313    subranges.push(start..IndexPos::new(range_size / 8));
314
315    // Shuffle and distribute subranges into buckets based on percentages
316    subranges.shuffle(&mut rng);
317
318    let mut result = Vec::new();
319
320    let (fill_cnt, dup_cnt, rand_cnt) = bucket_counts(num_buckets, percentages);
321
322    for (i, range) in subranges.into_iter().enumerate() {
323        if i < fill_cnt {
324            result.push((range, Bucket::Fill));
325        } else if i < fill_cnt + dup_cnt {
326            result.push((range, Bucket::Duplicate));
327        } else if i < fill_cnt + dup_cnt + rand_cnt {
328            result.push((range, Bucket::Random));
329        } else {
330            // Should never happen if bucket_counts is correct
331            unreachable!("More subranges than expected");
332        }
333    }
334
335    // Sort ranges by their start values to maintain order
336    result.sort_by_key(|(range, _)| range.start);
337
338    result
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344    use std::ops::Range;
345
346    fn total_bytes(range: &Range<IndexPos>) -> u64 {
347        (range.end - range.start) * 8
348    }
349
350    #[test]
351    fn test_fill_pattern() {
352        let (mut generator, segments) = DataMix::create(
353            6656,
354            12345,
355            1,
356            &PercentPattern {
357                fill: 100,
358                duplicates: 0,
359                random: 0,
360            },
361        );
362
363        // Verify we have one segment that's Fill type
364        assert_eq!(segments.len(), 1);
365        assert_eq!(segments[0].1, Bucket::Fill);
366
367        // Test at various byte offsets
368        for byte_offset in (0..800).step_by(8) {
369            generator.setup(IndexPos::new(byte_offset));
370            let val = generator.next_u64();
371            assert_eq!(
372                val, 0,
373                "Fill pattern should return 0 at byte offset {}",
374                byte_offset
375            );
376        }
377    }
378
379    #[test]
380    fn test_duplicate_pattern_single_read() {
381        let (mut generator, segments) = DataMix::create(
382            4096,
383            12345,
384            1,
385            &PercentPattern {
386                fill: 0,
387                duplicates: 100,
388                random: 0,
389            },
390        );
391
392        // Verify we have one segment that's Duplicate type
393        assert_eq!(segments.len(), 1);
394        assert_eq!(segments[0].1, Bucket::Duplicate);
395
396        // The duplicate pattern is 0, 1, 2, ..., 255, 0, 1, 2, ... (repeating)
397        // When we setup() at a byte offset, the internal counter is set to that byte offset
398        // Then each next_u64() call increments the counter and returns dupe_block[counter % 512]
399        // dupe_block[i] = i % 256
400
401        // Test setup at byte offset 0, then read consecutive values
402        generator.setup(IndexPos::new(0));
403        for i in 0..256 {
404            let val = generator.next_u64();
405            let expected = (i % 64) as u64;
406            assert_eq!(val, expected, "Duplicate pattern at index {}", i);
407        }
408    }
409
410    #[test]
411    fn test_duplicate_pattern_with_offset() {
412        let (mut generator, _) = DataMix::create(
413            4096,
414            12345,
415            1,
416            &PercentPattern {
417                fill: 0,
418                duplicates: 100,
419                random: 0,
420            },
421        );
422
423        // When setup(byte_offset) is called, it sets the internal counter to byte_offset
424        // Test at byte offset 16 (which means counter starts at 16)
425        generator.setup(IndexPos::new(16));
426        let val = generator.next_u64();
427        // counter is 16, so we get dupe_block[16 % 512] = dupe_block[16] = 16 % 256 = 16
428        assert_eq!(val, 16);
429
430        // Test at byte offset 256
431        generator.setup(IndexPos::new(256));
432        let val = generator.next_u64();
433        // counter is 256, so we get dupe_block[256 % 512] = dupe_block[256] = 256 % 256 = 0
434        assert_eq!(val, 0);
435
436        // Test at byte offset 257
437        generator.setup(IndexPos::new(257));
438        let val = generator.next_u64();
439        // counter is 257, so we get dupe_block[257 % 512] = dupe_block[257] = 257 % 256 = 1
440        assert_eq!(val, 1);
441    }
442
443    #[test]
444    fn test_duplicate_pattern_repeats() {
445        let (mut generator, _) = DataMix::create(
446            20480,
447            12345,
448            1,
449            &PercentPattern {
450                fill: 0,
451                duplicates: 100,
452                random: 0,
453            },
454        );
455
456        // The pattern repeats every 64 values
457        for index in 0..6 {
458            // Multiple of 512 work, convert from bytes to indexes
459            generator.setup(IndexPos::new(512 * index / 8));
460            let v = generator.next_u64();
461            assert_eq!(v, 0);
462        }
463    }
464
465    #[test]
466    fn test_random_pattern_deterministic() {
467        let seed = 42;
468        let (mut generator1, _) = DataMix::create(
469            6656,
470            seed,
471            1,
472            &PercentPattern {
473                fill: 0,
474                duplicates: 0,
475                random: 100,
476            },
477        );
478
479        let (mut generator2, _) = DataMix::create(
480            6656,
481            seed,
482            1,
483            &PercentPattern {
484                fill: 0,
485                duplicates: 0,
486                random: 100,
487            },
488        );
489
490        // Both generators with same seed should produce identical sequences
491        for byte_offset in (0..800).step_by(8) {
492            generator1.setup(IndexPos::new(byte_offset));
493            let val1 = generator1.next_u64();
494
495            generator2.setup(IndexPos::new(byte_offset));
496            let val2 = generator2.next_u64();
497
498            assert_eq!(
499                val1, val2,
500                "Random pattern should be deterministic at byte offset {}",
501                byte_offset
502            );
503        }
504    }
505
506    #[test]
507    fn test_random_pattern_different_seeds() {
508        let (mut generator1, _) = DataMix::create(
509            4096,
510            111,
511            1,
512            &PercentPattern {
513                fill: 0,
514                duplicates: 0,
515                random: 100,
516            },
517        );
518
519        let (mut generator2, _) = DataMix::create(
520            4096,
521            222,
522            1,
523            &PercentPattern {
524                fill: 0,
525                duplicates: 0,
526                random: 100,
527            },
528        );
529
530        // Different seeds should produce different sequences
531        let mut differences = 0;
532        let mut total = 0;
533        for byte_offset in (0..4096 / 8).step_by(8) {
534            generator1.setup(IndexPos::new(byte_offset));
535            let val1 = generator1.next_u64();
536
537            generator2.setup(IndexPos::new(byte_offset));
538            let val2 = generator2.next_u64();
539
540            if val1 != val2 {
541                differences += 1;
542            }
543            total += 1;
544        }
545
546        let percent_diff = differences as f64 / total as f64;
547
548        log::debug!("percent_diff = {percent_diff}");
549
550        assert!(
551            percent_diff > 0.9,
552            "Different seeds should produce mostly different values, got {} out of {total}",
553            differences
554        );
555    }
556
557    #[test]
558    fn test_mixed_pattern_segments() {
559        let (mut generator, segments) = DataMix::create(
560            1024 * 10,
561            12345,
562            10,
563            &PercentPattern {
564                fill: 40,
565                duplicates: 30,
566                random: 30,
567            },
568        );
569
570        // Verify we have 10 segments
571        assert_eq!(segments.len(), 10);
572
573        // Count the bucket types
574        let fill_count = segments.iter().filter(|(_, b)| *b == Bucket::Fill).count();
575        let dup_count = segments
576            .iter()
577            .filter(|(_, b)| *b == Bucket::Duplicate)
578            .count();
579        let rand_count = segments
580            .iter()
581            .filter(|(_, b)| *b == Bucket::Random)
582            .count();
583
584        // Verify distribution matches percentages
585        assert_eq!(fill_count, 4, "Should have 4 Fill segments (40%)");
586        assert_eq!(dup_count, 3, "Should have 3 Duplicate segments (30%)");
587        assert_eq!(rand_count, 3, "Should have 3 Random segments (30%)");
588
589        // Verify segments are non-overlapping and cover entire range
590        let mut last_end = IndexPos::new(0);
591        for (range, _) in &segments {
592            assert_eq!(range.start, last_end, "Segments should be contiguous");
593            assert!(range.end > range.start, "Segments should be non-empty");
594            last_end = range.end;
595        }
596        assert_eq!(
597            last_end,
598            IndexPos::new(1024 * 10 / 8),
599            "Segments should cover entire range"
600        );
601
602        // Test reading from each segment
603        for (range, bucket) in &segments {
604            generator.setup(range.start);
605            let val = generator.next_u64();
606
607            match bucket {
608                Bucket::Fill => {
609                    assert_eq!(val, 0, "Fill segment at {} should return 0", range.start)
610                }
611                Bucket::Duplicate => {
612                    let expected = range.start.as_u64() % 64;
613                    assert_eq!(
614                        val, expected,
615                        "Duplicate segment at {} should match counter pattern",
616                        range.start
617                    );
618                }
619                Bucket::Random => {
620                    // Just verify it doesn't panic
621                }
622                Bucket::NotValid => panic!("this should not be seen"),
623            }
624        }
625    }
626
627    #[test]
628    fn test_setup_preserves_offset_position() {
629        let (mut generator, _) = DataMix::create(
630            4096,
631            12345,
632            1,
633            &PercentPattern {
634                fill: 0,
635                duplicates: 0,
636                random: 100,
637            },
638        );
639
640        // Read value from byte offset 100
641        generator.setup(IndexPos::new(100));
642        let val_100 = generator.next_u64();
643
644        // Read from other offsets
645        for byte_offset in (200..400).step_by(8) {
646            generator.setup(IndexPos::new(byte_offset));
647            generator.next_u64();
648        }
649
650        // Setup at byte offset 100 again
651        generator.setup(IndexPos::new(100));
652        let val_100_again = generator.next_u64();
653
654        assert_eq!(
655            val_100, val_100_again,
656            "Same setup should produce same value"
657        );
658    }
659
660    #[test]
661    fn test_sequential_read_across_segments() {
662        let (mut generator, segments) = DataMix::create(
663            2048,
664            12345,
665            4,
666            &PercentPattern {
667                fill: 50,
668                duplicates: 25,
669                random: 25,
670            },
671        );
672
673        // Test reading from various byte offsets
674        for offset_in_bytes in (0..2000).step_by(96) {
675            let index_position = IndexPos::new(offset_in_bytes / 8);
676
677            generator.setup(index_position);
678            let val = generator.next_u64();
679
680            // Find which segment we're in
681            let segment = segments
682                .iter()
683                .find(|(range, _)| range.contains(&index_position))
684                .unwrap_or_else(|| panic!("No segment for offset {}", index_position));
685
686            let (_range, bucket) = segment;
687
688            // Verify the value matches the pattern
689            match bucket {
690                Bucket::Fill => assert_eq!(val, 0, "Fill at offset {}", index_position),
691                Bucket::Duplicate => {
692                    let expected = index_position.as_u64() % 64;
693                    assert_eq!(val, expected, "Duplicate at offset {}", index_position);
694                }
695                Bucket::Random => {}
696                Bucket::NotValid => panic!("Should not be seeing NotValid in normal operation"),
697            }
698        }
699    }
700
701    #[test]
702    fn test_continuous_sequential_read() {
703        // Test reading continuously without setup between reads
704        let (mut generator, _) = DataMix::create(
705            1024,
706            12345,
707            1,
708            &PercentPattern {
709                fill: 0,
710                duplicates: 100,
711                random: 0,
712            },
713        );
714
715        // Setup at 0 and read consecutive u64s
716        generator.setup(IndexPos::new(0));
717
718        // Read 128 consecutive u64s
719        for i in 0..128 {
720            let val = generator.next_u64();
721            let expected = (i % 64) as u64;
722            assert_eq!(val, expected, "Continuous read at index {}", i);
723        }
724    }
725
726    #[test]
727    fn test_random_access_pattern() {
728        let (mut generator, _) = DataMix::create(
729            4096,
730            12345,
731            5,
732            &PercentPattern {
733                fill: 33,
734                duplicates: 33,
735                random: 34,
736            },
737        );
738
739        // Test random access
740        let test_offsets = vec![0, 800, 1600, 2400, 3200];
741
742        for &byte_offset in &test_offsets {
743            // The test offsets are in bytes, not indexes
744            let index_offset = IndexPos::new(byte_offset / 8);
745            generator.setup(index_offset);
746            let val1 = generator.next_u64();
747
748            generator.setup(index_offset);
749            let val2 = generator.next_u64();
750
751            assert_eq!(
752                val1, val2,
753                "Same offset {} should give same value",
754                index_offset
755            );
756        }
757    }
758
759    #[test]
760    fn test_duplicate_pattern_boundary() {
761        let (mut generator, _) = DataMix::create(
762            4096,
763            12345,
764            1,
765            &PercentPattern {
766                fill: 0,
767                duplicates: 100,
768                random: 0,
769            },
770        );
771
772        // Test around the 256-value boundary
773        generator.setup(IndexPos::new(254));
774        let val_62 = generator.next_u64();
775        assert_eq!(val_62, 62);
776
777        generator.setup(IndexPos::new(255));
778        let val_63 = generator.next_u64();
779        assert_eq!(val_63, 63);
780
781        generator.setup(IndexPos::new(256));
782        let val_256 = generator.next_u64();
783        assert_eq!(val_256, 0); // Should wrap around
784
785        generator.setup(IndexPos::new(257));
786        let val_257 = generator.next_u64();
787        assert_eq!(val_257, 1);
788    }
789
790    #[test]
791    fn test_segment_creation_coverage() {
792        // Test various segment counts
793        for num_segments in [1, 5, 10, 50, 100] {
794            let size = 512 * 5000;
795            let (_generator, segments) = DataMix::create(
796                size,
797                12345,
798                num_segments,
799                &PercentPattern {
800                    fill: 33,
801                    duplicates: 33,
802                    random: 34,
803                },
804            );
805
806            assert_eq!(segments.len(), num_segments);
807
808            // Verify complete coverage
809            let total_size: u64 = segments.iter().map(|(range, _)| total_bytes(range)).sum();
810            assert_eq!(total_size, size, "Segments must cover entire range");
811        }
812    }
813
814    #[test]
815    #[should_panic(expected = "Percentages must sum to 100")]
816    fn test_invalid_percentages() {
817        DataMix::create(
818            1024,
819            12345,
820            2,
821            &PercentPattern {
822                fill: 40,
823                duplicates: 40,
824                random: 40,
825            },
826        );
827    }
828
829    #[test]
830    fn test_segment_boundaries() {
831        let (mut generator, segments) = DataMix::create(
832            2048,
833            12345,
834            3,
835            &PercentPattern {
836                fill: 33,
837                duplicates: 33,
838                random: 34,
839            },
840        );
841
842        // Test reading at segment boundaries
843        for (range, _) in &segments {
844            generator.setup(range.start);
845            let _val_start = generator.next_u64();
846
847            if range.end.as_u64() >= 8 {
848                let last_offset = range.end - IndexPos::new(8);
849                generator.setup(IndexPos::new(last_offset));
850                let _val_end = generator.next_u64();
851            }
852        }
853    }
854
855    #[test]
856    fn test_large_offset() {
857        let (mut generator, _) = DataMix::create(
858            100352,
859            12345,
860            1,
861            &PercentPattern {
862                fill: 0,
863                duplicates: 100,
864                random: 0,
865            },
866        );
867
868        // Test with large offsets
869        for byte_offset in [0, 10000, 50000, 99000] {
870            let index_val = byte_offset / 8;
871            generator.setup(IndexPos::new(index_val));
872            let val = generator.next_u64();
873            let expected = index_val % 64;
874            assert_eq!(val, expected, "Duplicate pattern at offset {}", byte_offset);
875        }
876    }
877
878    #[test]
879    fn test_segment_transition() {
880        let (mut generator, segments) = DataMix::create(
881            2048,
882            12345,
883            4,
884            &PercentPattern {
885                fill: 50,
886                duplicates: 25,
887                random: 25,
888            },
889        );
890
891        // Test reading across segment boundaries
892        for i in segments.iter().skip(1) {
893            let boundary = i.0.start;
894
895            if boundary.as_u64() >= 8 {
896                // Read before boundary
897                generator.setup(IndexPos::new(boundary - IndexPos::new(8)));
898                let _val_before = generator.next_u64();
899
900                // Read at boundary
901                generator.setup(boundary);
902                let _val_at = generator.next_u64();
903            }
904        }
905    }
906}