vortex_array/arrays/
dict_test.rs1#![allow(clippy::unwrap_used)]
5
6use rand::Rng;
7use rand::SeedableRng;
8use rand::distr::Alphanumeric;
9use rand::distr::Distribution;
10use rand::distr::StandardUniform;
11use rand::prelude::IndexedRandom;
12use rand::prelude::StdRng;
13use vortex_buffer::Buffer;
14use vortex_dtype::NativePType;
15use vortex_error::VortexResult;
16use vortex_error::VortexUnwrap;
17
18use super::ChunkedArray;
19use super::DictArray;
20use super::PrimitiveArray;
21use crate::ArrayRef;
22use crate::IntoArray;
23use crate::validity::Validity;
24
25pub fn gen_primitive_for_dict<T: NativePType>(len: usize, unique_count: usize) -> PrimitiveArray
26where
27 StandardUniform: Distribution<T>,
28{
29 let mut rng = StdRng::seed_from_u64(0);
30 let values = (0..unique_count)
31 .map(|_| rng.random::<T>())
32 .collect::<Vec<T>>();
33 let data = (0..len)
34 .map(|_| *values.choose(&mut rng).unwrap())
35 .collect::<Buffer<_>>();
36 PrimitiveArray::new(data, Validity::NonNullable)
37}
38
39pub fn gen_primitive_dict<V: NativePType, C: NativePType>(
40 len: usize,
41 unique_count: usize,
42) -> VortexResult<DictArray>
43where
44 StandardUniform: Distribution<V>,
45{
46 let mut rng = StdRng::seed_from_u64(0);
47 let values = (0..unique_count)
48 .map(|_| rng.random::<V>())
49 .collect::<PrimitiveArray>();
50
51 let codes = (0..len)
52 .map(|_| C::from(rng.random_range(0..unique_count)).unwrap())
53 .collect::<PrimitiveArray>();
54
55 DictArray::try_new(codes.into_array(), values.into_array())
56}
57
58pub fn gen_varbin_words(len: usize, unique_count: usize) -> Vec<String> {
59 let rng = &mut StdRng::seed_from_u64(0);
60 let dict: Vec<String> = (0..unique_count)
61 .map(|_| {
62 rng.sample_iter(&Alphanumeric)
63 .take(16)
64 .map(char::from)
65 .collect()
66 })
67 .collect();
68
69 (0..len)
70 .map(|_| dict.choose(rng).unwrap().clone())
71 .collect()
72}
73
74pub fn gen_dict_primitive_chunks<T: NativePType, O: NativePType>(
75 len: usize,
76 unique_values: usize,
77 chunk_count: usize,
78) -> ArrayRef
79where
80 StandardUniform: Distribution<T>,
81{
82 (0..chunk_count)
83 .map(|_| {
84 gen_primitive_dict::<T, O>(len, unique_values)
85 .vortex_unwrap()
86 .into_array()
87 })
88 .collect::<ChunkedArray>()
89 .into_array()
90}