vortex_array/arrays/
arbitrary.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::iter;
5use std::sync::Arc;
6
7use arbitrary::Arbitrary;
8use arbitrary::Result;
9use arbitrary::Unstructured;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::Buffer;
12use vortex_dtype::DType;
13use vortex_dtype::IntegerPType;
14use vortex_dtype::NativePType;
15use vortex_dtype::Nullability;
16use vortex_dtype::PType;
17use vortex_dtype::match_each_decimal_value_type;
18use vortex_error::VortexExpect;
19use vortex_error::VortexUnwrap;
20use vortex_scalar::Scalar;
21use vortex_scalar::arbitrary::random_scalar;
22
23use super::BoolArray;
24use super::ChunkedArray;
25use super::NullArray;
26use super::PrimitiveArray;
27use super::StructArray;
28use crate::Array;
29use crate::ArrayRef;
30use crate::IntoArray;
31use crate::ToCanonical;
32use crate::arrays::VarBinArray;
33use crate::arrays::VarBinViewArray;
34use crate::builders::ArrayBuilder;
35use crate::builders::DecimalBuilder;
36use crate::builders::FixedSizeListBuilder;
37use crate::builders::ListViewBuilder;
38use crate::validity::Validity;
39
40/// A wrapper type to implement `Arbitrary` for `ArrayRef`.
41#[derive(Clone, Debug)]
42pub struct ArbitraryArray(pub ArrayRef);
43
44impl<'a> Arbitrary<'a> for ArbitraryArray {
45    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
46        let dtype = u.arbitrary()?;
47        Self::arbitrary_with(u, None, &dtype)
48    }
49}
50
51impl ArbitraryArray {
52    pub fn arbitrary_with(u: &mut Unstructured, len: Option<usize>, dtype: &DType) -> Result<Self> {
53        random_array(u, dtype, len).map(ArbitraryArray)
54    }
55}
56
57fn split_number_into_parts(n: usize, parts: usize) -> Vec<usize> {
58    let reminder = n % parts;
59    let division = (n - reminder) / parts;
60    iter::repeat_n(division, parts - reminder)
61        .chain(iter::repeat_n(division + 1, reminder))
62        .collect()
63}
64
65/// Creates a random array with a random number of chunks.
66fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Result<ArrayRef> {
67    let num_chunks = u.int_in_range(1..=3)?;
68    let chunk_lens = len.map(|l| split_number_into_parts(l, num_chunks));
69    let mut chunks = (0..num_chunks)
70        .map(|i| {
71            let chunk_len = chunk_lens.as_ref().map(|c| c[i]);
72            random_array_chunk(u, dtype, chunk_len)
73        })
74        .collect::<Result<Vec<_>>>()?;
75
76    if chunks.len() == 1 {
77        Ok(chunks.remove(0))
78    } else {
79        let dtype = chunks[0].dtype().clone();
80        Ok(ChunkedArray::try_new(chunks, dtype)
81            .vortex_unwrap()
82            .into_array())
83    }
84}
85
86/// Creates a random array chunk.
87fn random_array_chunk(
88    u: &mut Unstructured<'_>,
89    dtype: &DType,
90    chunk_len: Option<usize>,
91) -> Result<ArrayRef> {
92    match dtype {
93        DType::Null => Ok(NullArray::new(
94            chunk_len
95                .map(Ok)
96                .unwrap_or_else(|| u.int_in_range(0..=100))?,
97        )
98        .into_array()),
99        DType::Bool(n) => random_bool(u, *n, chunk_len),
100        DType::Primitive(ptype, n) => match ptype {
101            PType::U8 => random_primitive::<u8>(u, *n, chunk_len),
102            PType::U16 => random_primitive::<u16>(u, *n, chunk_len),
103            PType::U32 => random_primitive::<u32>(u, *n, chunk_len),
104            PType::U64 => random_primitive::<u64>(u, *n, chunk_len),
105            PType::I8 => random_primitive::<i8>(u, *n, chunk_len),
106            PType::I16 => random_primitive::<i16>(u, *n, chunk_len),
107            PType::I32 => random_primitive::<i32>(u, *n, chunk_len),
108            PType::I64 => random_primitive::<i64>(u, *n, chunk_len),
109            PType::F16 => Ok(random_primitive::<u16>(u, *n, chunk_len)?
110                .to_primitive()
111                .reinterpret_cast(PType::F16)
112                .into_array()),
113            PType::F32 => random_primitive::<f32>(u, *n, chunk_len),
114            PType::F64 => random_primitive::<f64>(u, *n, chunk_len),
115        },
116        d @ DType::Decimal(decimal, n) => {
117            let elem_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
118            match_each_decimal_value_type!(
119                DecimalType::smallest_decimal_value_type(decimal),
120                |DVT| {
121                    let mut builder = DecimalBuilder::new::<DVT>(*decimal, *n);
122                    for _i in 0..elem_len {
123                        let random_decimal = random_scalar(u, d)?;
124                        builder.append_scalar(&random_decimal).vortex_expect(
125                            "was somehow unable to append a decimal to a decimal builder",
126                        );
127                    }
128                    Ok(builder.finish())
129                }
130            )
131        }
132        DType::Utf8(n) => random_string(u, *n, chunk_len),
133        DType::Binary(n) => random_bytes(u, *n, chunk_len),
134        DType::Struct(sdt, n) => {
135            let first_array = sdt
136                .fields()
137                .next()
138                .map(|d| random_array(u, &d, chunk_len))
139                .transpose()?;
140            let resolved_len = first_array
141                .as_ref()
142                .map(|a| a.len())
143                .or(chunk_len)
144                .map(Ok)
145                .unwrap_or_else(|| u.int_in_range(0..=100))?;
146            let children = first_array
147                .into_iter()
148                .map(Ok)
149                .chain(
150                    sdt.fields()
151                        .skip(1)
152                        .map(|d| random_array(u, &d, Some(resolved_len))),
153                )
154                .collect::<Result<Vec<_>>>()?;
155            Ok(StructArray::try_new(
156                sdt.names().clone(),
157                children,
158                resolved_len,
159                random_validity(u, *n, resolved_len)?,
160            )
161            .vortex_unwrap()
162            .into_array())
163        }
164        DType::List(elem_dtype, null) => random_list(u, elem_dtype, *null, chunk_len),
165        DType::FixedSizeList(elem_dtype, list_size, null) => {
166            random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
167        }
168        DType::Extension(..) => {
169            todo!("Extension arrays are not implemented")
170        }
171    }
172}
173
174/// Creates a random fixed-size list array.
175///
176/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
177fn random_fixed_size_list(
178    u: &mut Unstructured,
179    elem_dtype: &Arc<DType>,
180    list_size: u32,
181    null: Nullability,
182    chunk_len: Option<usize>,
183) -> Result<ArrayRef> {
184    let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
185
186    let mut builder =
187        FixedSizeListBuilder::with_capacity(elem_dtype.clone(), list_size, null, array_length);
188
189    for _ in 0..array_length {
190        if null == Nullability::Nullable && u.arbitrary::<bool>()? {
191            builder.append_null();
192        } else {
193            builder
194                .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
195                .vortex_expect("can append value");
196        }
197    }
198
199    Ok(builder.finish())
200}
201
202/// Creates a random list array.
203///
204/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
205fn random_list(
206    u: &mut Unstructured,
207    elem_dtype: &Arc<DType>,
208    null: Nullability,
209    chunk_len: Option<usize>,
210) -> Result<ArrayRef> {
211    match u.int_in_range(0..=5)? {
212        0 => random_list_with_offset_type::<i16>(u, elem_dtype, null, chunk_len),
213        1 => random_list_with_offset_type::<i32>(u, elem_dtype, null, chunk_len),
214        2 => random_list_with_offset_type::<i64>(u, elem_dtype, null, chunk_len),
215        3 => random_list_with_offset_type::<u16>(u, elem_dtype, null, chunk_len),
216        4 => random_list_with_offset_type::<u32>(u, elem_dtype, null, chunk_len),
217        5 => random_list_with_offset_type::<u64>(u, elem_dtype, null, chunk_len),
218        _ => unreachable!("int_in_range returns a value in the above range"),
219    }
220}
221
222/// Creates a random list array with the given [`IntegerPType`] for the internal offsets child.
223///
224/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
225fn random_list_with_offset_type<O: IntegerPType>(
226    u: &mut Unstructured,
227    elem_dtype: &Arc<DType>,
228    null: Nullability,
229    chunk_len: Option<usize>,
230) -> Result<ArrayRef> {
231    let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
232
233    let mut builder = ListViewBuilder::<O, O>::with_capacity(elem_dtype.clone(), null, 20, 10);
234
235    for _ in 0..array_length {
236        if null == Nullability::Nullable && u.arbitrary::<bool>()? {
237            builder.append_null();
238        } else {
239            let list_size = u.int_in_range(0..=20)?;
240            builder
241                .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
242                .vortex_expect("can append value");
243        }
244    }
245
246    Ok(builder.finish())
247}
248
249/// Creates a random list scalar with the specified list size.
250fn random_list_scalar(
251    u: &mut Unstructured,
252    elem_dtype: &Arc<DType>,
253    list_size: u32,
254    null: Nullability,
255) -> Result<Scalar> {
256    let elems = (0..list_size)
257        .map(|_| random_scalar(u, elem_dtype))
258        .collect::<Result<Vec<_>>>()?;
259    Ok(Scalar::list(elem_dtype.clone(), elems, null))
260}
261
262fn random_string(
263    u: &mut Unstructured,
264    nullability: Nullability,
265    len: Option<usize>,
266) -> Result<ArrayRef> {
267    match nullability {
268        Nullability::NonNullable => {
269            let v = arbitrary_vec_of_len::<String>(u, len)?;
270            Ok(match u.int_in_range(0..=1)? {
271                0 => VarBinArray::from_vec(v, DType::Utf8(Nullability::NonNullable)).into_array(),
272                1 => VarBinViewArray::from_iter_str(v).into_array(),
273                _ => unreachable!(),
274            })
275        }
276        Nullability::Nullable => {
277            let v = arbitrary_vec_of_len::<Option<String>>(u, len)?;
278            Ok(match u.int_in_range(0..=1)? {
279                0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(),
280                1 => VarBinViewArray::from_iter_nullable_str(v).into_array(),
281                _ => unreachable!(),
282            })
283        }
284    }
285}
286
287fn random_bytes(
288    u: &mut Unstructured,
289    nullability: Nullability,
290    len: Option<usize>,
291) -> Result<ArrayRef> {
292    match nullability {
293        Nullability::NonNullable => {
294            let v = arbitrary_vec_of_len::<Vec<u8>>(u, len)?;
295            Ok(match u.int_in_range(0..=1)? {
296                0 => VarBinArray::from_vec(v, DType::Binary(Nullability::NonNullable)).into_array(),
297                1 => VarBinViewArray::from_iter_bin(v).into_array(),
298                _ => unreachable!(),
299            })
300        }
301        Nullability::Nullable => {
302            let v = arbitrary_vec_of_len::<Option<Vec<u8>>>(u, len)?;
303            Ok(match u.int_in_range(0..=1)? {
304                0 => VarBinArray::from_iter(v, DType::Binary(Nullability::Nullable)).into_array(),
305                1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(),
306                _ => unreachable!(),
307            })
308        }
309    }
310}
311
312fn random_primitive<'a, T: Arbitrary<'a> + NativePType>(
313    u: &mut Unstructured<'a>,
314    nullability: Nullability,
315    len: Option<usize>,
316) -> Result<ArrayRef> {
317    let v = arbitrary_vec_of_len::<T>(u, len)?;
318    let validity = random_validity(u, nullability, v.len())?;
319    Ok(PrimitiveArray::new(Buffer::copy_from(v), validity).into_array())
320}
321
322fn random_bool(
323    u: &mut Unstructured,
324    nullability: Nullability,
325    len: Option<usize>,
326) -> Result<ArrayRef> {
327    let v = arbitrary_vec_of_len(u, len)?;
328    let validity = random_validity(u, nullability, v.len())?;
329    Ok(BoolArray::from_bit_buffer(BitBuffer::from(v), validity).into_array())
330}
331
332fn random_validity(u: &mut Unstructured, nullability: Nullability, len: usize) -> Result<Validity> {
333    match nullability {
334        Nullability::NonNullable => Ok(Validity::NonNullable),
335        Nullability::Nullable => Ok(match u.int_in_range(0..=2)? {
336            0 => Validity::AllValid,
337            1 => Validity::AllInvalid,
338            2 => Validity::from_iter(arbitrary_vec_of_len::<bool>(u, Some(len))?),
339            _ => unreachable!(),
340        }),
341    }
342}
343
344fn arbitrary_vec_of_len<'a, T: Arbitrary<'a>>(
345    u: &mut Unstructured<'a>,
346    len: Option<usize>,
347) -> Result<Vec<T>> {
348    len.map(|l| (0..l).map(|_| T::arbitrary(u)).collect::<Result<Vec<_>>>())
349        .unwrap_or_else(|| Vec::<T>::arbitrary(u))
350}