vortex_array/arrays/
dict_test.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4#![allow(clippy::unwrap_used)]
5
6use rand::Rng;
7use rand::SeedableRng;
8use rand::distr::Alphanumeric;
9use rand::distr::Distribution;
10use rand::distr::StandardUniform;
11use rand::prelude::IndexedRandom;
12use rand::prelude::StdRng;
13use vortex_buffer::Buffer;
14use vortex_dtype::NativePType;
15use vortex_error::VortexResult;
16use vortex_error::VortexUnwrap;
17
18use super::ChunkedArray;
19use super::DictArray;
20use super::PrimitiveArray;
21use crate::ArrayRef;
22use crate::IntoArray;
23use crate::validity::Validity;
24
25pub fn gen_primitive_for_dict<T: NativePType>(len: usize, unique_count: usize) -> PrimitiveArray
26where
27    StandardUniform: Distribution<T>,
28{
29    let mut rng = StdRng::seed_from_u64(0);
30    let values = (0..unique_count)
31        .map(|_| rng.random::<T>())
32        .collect::<Vec<T>>();
33    let data = (0..len)
34        .map(|_| *values.choose(&mut rng).unwrap())
35        .collect::<Buffer<_>>();
36    PrimitiveArray::new(data, Validity::NonNullable)
37}
38
39pub fn gen_primitive_dict<V: NativePType, C: NativePType>(
40    len: usize,
41    unique_count: usize,
42) -> VortexResult<DictArray>
43where
44    StandardUniform: Distribution<V>,
45{
46    let mut rng = StdRng::seed_from_u64(0);
47    let values = (0..unique_count)
48        .map(|_| rng.random::<V>())
49        .collect::<PrimitiveArray>();
50
51    let codes = (0..len)
52        .map(|_| C::from(rng.random_range(0..unique_count)).unwrap())
53        .collect::<PrimitiveArray>();
54
55    DictArray::try_new(codes.into_array(), values.into_array())
56}
57
58pub fn gen_varbin_words(len: usize, unique_count: usize) -> Vec<String> {
59    let rng = &mut StdRng::seed_from_u64(0);
60    let dict: Vec<String> = (0..unique_count)
61        .map(|_| {
62            rng.sample_iter(&Alphanumeric)
63                .take(16)
64                .map(char::from)
65                .collect()
66        })
67        .collect();
68
69    (0..len)
70        .map(|_| dict.choose(rng).unwrap().clone())
71        .collect()
72}
73
74pub fn gen_dict_primitive_chunks<T: NativePType, O: NativePType>(
75    len: usize,
76    unique_values: usize,
77    chunk_count: usize,
78) -> ArrayRef
79where
80    StandardUniform: Distribution<T>,
81{
82    (0..chunk_count)
83        .map(|_| {
84            gen_primitive_dict::<T, O>(len, unique_values)
85                .vortex_unwrap()
86                .into_array()
87        })
88        .collect::<ChunkedArray>()
89        .into_array()
90}