Skip to main content

xet_client/chunk_cache/disk/
test_utils.rs

1use std::path::Path;
2
3use rand::rngs::{StdRng, ThreadRng};
4use rand::seq::SliceRandom;
5use rand::{Rng, RngExt, SeedableRng, rng};
6use xet_core_structures::merklehash::MerkleHash;
7
8use crate::cas_types::{ChunkRange, Key};
9
10#[cfg(test)]
11pub const RANGE_LEN: u32 = 16 << 10;
12#[cfg(not(test))]
13pub const RANGE_LEN: u32 = 16 << 19;
14
15pub fn print_directory_contents(path: &Path) {
16    // Read the contents of the directory
17    match std::fs::read_dir(path) {
18        Ok(entries) => {
19            for entry in entries {
20                match entry {
21                    Ok(entry) => {
22                        let path = entry.path();
23                        // Print the path
24                        println!("{}", path.display());
25
26                        // If it's a directory, call this function recursively
27                        if path.is_dir() {
28                            print_directory_contents(&path);
29                        }
30                    },
31                    Err(e) => eprintln!("Error reading entry: {e}"),
32                }
33            }
34        },
35        Err(e) => eprintln!("Error reading directory: {e}"),
36    }
37}
38
39pub fn random_key(rng: &mut impl Rng) -> Key {
40    Key {
41        prefix: "default".to_string(),
42        hash: MerkleHash::from_slice(&rng.random::<[u8; 32]>()).unwrap(),
43    }
44}
45
46pub fn random_range(rng: &mut impl Rng) -> ChunkRange {
47    let start = rng.random::<u32>() % 1000;
48    let end = start + 1 + rng.random::<u32>() % (1024 - start);
49    ChunkRange::new(start, end)
50}
51
52pub fn random_bytes(rng: &mut impl Rng, range: &ChunkRange, len: u32) -> (Vec<u32>, Vec<u8>) {
53    let random_vec: Vec<u8> = (0..len).map(|_| rng.random()).collect();
54    if range.end - range.start == 0 {
55        return (vec![0, len], random_vec);
56    }
57
58    let mut offsets = Vec::with_capacity((range.end - range.start + 1) as usize);
59    offsets.push(0);
60    let mut candidates: Vec<u32> = (1..len).collect();
61    candidates.shuffle(rng);
62    candidates
63        .into_iter()
64        .take((range.end - range.start - 1) as usize)
65        .for_each(|v| offsets.push(v));
66    offsets.sort();
67    offsets.push(len);
68
69    (offsets.to_vec(), random_vec)
70}
71
72#[derive(Debug)]
73pub struct RandomEntryIterator<T: Rng> {
74    rng: T,
75    range_len: u32,
76    one_chunk_ranges: bool,
77}
78
79impl<T: Rng> RandomEntryIterator<T> {
80    pub fn new(rng: T) -> Self {
81        Self {
82            rng,
83            range_len: RANGE_LEN,
84            one_chunk_ranges: false,
85        }
86    }
87
88    pub fn with_range_len(mut self, len: u32) -> Self {
89        self.range_len = len;
90        self
91    }
92
93    // default is false, only use to set to true
94    pub fn with_one_chunk_ranges(mut self, one_chunk_ranges: bool) -> Self {
95        self.one_chunk_ranges = one_chunk_ranges;
96        self
97    }
98
99    pub fn next_key_range(&mut self) -> (Key, ChunkRange) {
100        (random_key(&mut self.rng), random_range(&mut self.rng))
101    }
102}
103
104impl<T: SeedableRng + Rng> RandomEntryIterator<T> {
105    pub fn from_seed(seed: u64) -> Self {
106        Self::new(T::seed_from_u64(seed))
107    }
108}
109
110impl RandomEntryIterator<StdRng> {
111    pub fn std_from_seed(seed: u64) -> Self {
112        Self::from_seed(seed)
113    }
114}
115
116impl Default for RandomEntryIterator<ThreadRng> {
117    fn default() -> Self {
118        Self::new(rng())
119    }
120}
121
122impl<T: Rng> Iterator for RandomEntryIterator<T> {
123    type Item = (Key, ChunkRange, Vec<u32>, Vec<u8>);
124
125    fn next(&mut self) -> Option<Self::Item> {
126        let key = random_key(&mut self.rng);
127        let range = if self.one_chunk_ranges {
128            let start = self.rng.random();
129            ChunkRange::new(start, start + 1)
130        } else {
131            random_range(&mut self.rng)
132        };
133        let (offsets, data) = random_bytes(&mut self.rng, &range, self.range_len);
134        Some((key, range, offsets, data))
135    }
136}
137
138#[cfg(test)]
139mod tests {
140    use rand::rngs::StdRng;
141
142    use super::RandomEntryIterator;
143
144    #[test]
145    fn test_iter() {
146        let mut it = RandomEntryIterator::default();
147        for _ in 0..100 {
148            let (_key, range, chunk_byte_indices, data) = it.next().unwrap();
149            assert!(range.start < range.end, "invalid range: {range:?}");
150            assert_eq!(
151                chunk_byte_indices.len(),
152                (range.end - range.start + 1) as usize,
153                "chunk_byte_indices len mismatch, range: {range:?}, cbi len: {}",
154                chunk_byte_indices.len()
155            );
156            assert_eq!(chunk_byte_indices[0], 0, "chunk_byte_indices[0] != 0, is instead {}", chunk_byte_indices[0]);
157            assert_eq!(
158                *chunk_byte_indices.last().unwrap() as usize,
159                data.len(),
160                "chunk_byte_indices last value does not equal data.len() ({}), is instead {}",
161                data.len(),
162                chunk_byte_indices.last().unwrap()
163            );
164        }
165    }
166
167    #[test]
168    fn test_iter_with_seed() {
169        const SEED: u64 = 500555;
170        let mut it1: RandomEntryIterator<StdRng> = RandomEntryIterator::from_seed(SEED);
171        let mut it2: RandomEntryIterator<StdRng> = RandomEntryIterator::from_seed(SEED);
172
173        for _ in 0..10 {
174            let v1 = it1.next().unwrap();
175            let v2 = it2.next().unwrap();
176            assert_eq!(v1, v2);
177        }
178
179        for _ in 0..10 {
180            let v1 = it1.next_key_range();
181            let v2 = it2.next_key_range();
182            assert_eq!(v1, v2);
183        }
184    }
185}