vortex_array/arrays/
dict_test.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4#![allow(clippy::unwrap_used)]
5
6use rand::distr::{Alphanumeric, Distribution, StandardUniform};
7use rand::prelude::{IndexedRandom, StdRng};
8use rand::{Rng, SeedableRng};
9use vortex_buffer::Buffer;
10use vortex_dtype::NativePType;
11use vortex_error::{VortexResult, VortexUnwrap};
12
13use super::{ChunkedArray, DictArray, PrimitiveArray};
14use crate::validity::Validity;
15use crate::{ArrayRef, IntoArray};
16
17pub fn gen_primitive_for_dict<T: NativePType>(len: usize, unique_count: usize) -> PrimitiveArray
18where
19    StandardUniform: Distribution<T>,
20{
21    let mut rng = StdRng::seed_from_u64(0);
22    let values = (0..unique_count)
23        .map(|_| rng.random::<T>())
24        .collect::<Vec<T>>();
25    let data = (0..len)
26        .map(|_| *values.choose(&mut rng).unwrap())
27        .collect::<Buffer<_>>();
28    PrimitiveArray::new(data, Validity::NonNullable)
29}
30
31pub fn gen_primitive_dict<V: NativePType, C: NativePType>(
32    len: usize,
33    unique_count: usize,
34) -> VortexResult<DictArray>
35where
36    StandardUniform: Distribution<V>,
37{
38    let mut rng = StdRng::seed_from_u64(0);
39    let values = (0..unique_count)
40        .map(|_| rng.random::<V>())
41        .collect::<PrimitiveArray>();
42
43    let codes = (0..len)
44        .map(|_| C::from(rng.random_range(0..unique_count)).unwrap())
45        .collect::<PrimitiveArray>();
46
47    DictArray::try_new(codes.into_array(), values.into_array())
48}
49
50pub fn gen_varbin_words(len: usize, unique_count: usize) -> Vec<String> {
51    let rng = &mut StdRng::seed_from_u64(0);
52    let dict: Vec<String> = (0..unique_count)
53        .map(|_| {
54            rng.sample_iter(&Alphanumeric)
55                .take(16)
56                .map(char::from)
57                .collect()
58        })
59        .collect();
60
61    (0..len)
62        .map(|_| dict.choose(rng).unwrap().clone())
63        .collect()
64}
65
66pub fn gen_dict_primitive_chunks<T: NativePType, O: NativePType>(
67    len: usize,
68    unique_values: usize,
69    chunk_count: usize,
70) -> ArrayRef
71where
72    StandardUniform: Distribution<T>,
73{
74    (0..chunk_count)
75        .map(|_| {
76            gen_primitive_dict::<T, O>(len, unique_values)
77                .vortex_unwrap()
78                .into_array()
79        })
80        .collect::<ChunkedArray>()
81        .into_array()
82}