vortex_array/arrays/
arbitrary.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::iter;
5use std::sync::Arc;
6
7use arbitrary::Arbitrary;
8use arbitrary::Result;
9use arbitrary::Unstructured;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::Buffer;
12use vortex_dtype::DType;
13use vortex_dtype::IntegerPType;
14use vortex_dtype::NativePType;
15use vortex_dtype::Nullability;
16use vortex_dtype::PType;
17use vortex_dtype::match_each_decimal_value_type;
18use vortex_error::VortexExpect;
19use vortex_scalar::Scalar;
20use vortex_scalar::arbitrary::random_scalar;
21
22use super::BoolArray;
23use super::ChunkedArray;
24use super::NullArray;
25use super::PrimitiveArray;
26use super::StructArray;
27use crate::Array;
28use crate::ArrayRef;
29use crate::IntoArray;
30use crate::ToCanonical;
31use crate::arrays::VarBinArray;
32use crate::arrays::VarBinViewArray;
33use crate::builders::ArrayBuilder;
34use crate::builders::DecimalBuilder;
35use crate::builders::FixedSizeListBuilder;
36use crate::builders::ListViewBuilder;
37use crate::validity::Validity;
38
39/// A wrapper type to implement `Arbitrary` for `ArrayRef`.
40#[derive(Clone, Debug)]
41pub struct ArbitraryArray(pub ArrayRef);
42
43impl<'a> Arbitrary<'a> for ArbitraryArray {
44    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
45        let dtype = u.arbitrary()?;
46        Self::arbitrary_with(u, None, &dtype)
47    }
48}
49
50impl ArbitraryArray {
51    pub fn arbitrary_with(u: &mut Unstructured, len: Option<usize>, dtype: &DType) -> Result<Self> {
52        random_array(u, dtype, len).map(ArbitraryArray)
53    }
54}
55
56fn split_number_into_parts(n: usize, parts: usize) -> Vec<usize> {
57    let reminder = n % parts;
58    let division = (n - reminder) / parts;
59    iter::repeat_n(division, parts - reminder)
60        .chain(iter::repeat_n(division + 1, reminder))
61        .collect()
62}
63
64/// Creates a random array with a random number of chunks.
65fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Result<ArrayRef> {
66    let num_chunks = u.int_in_range(1..=3)?;
67    let chunk_lens = len.map(|l| split_number_into_parts(l, num_chunks));
68    let mut chunks = (0..num_chunks)
69        .map(|i| {
70            let chunk_len = chunk_lens.as_ref().map(|c| c[i]);
71            random_array_chunk(u, dtype, chunk_len)
72        })
73        .collect::<Result<Vec<_>>>()?;
74
75    if chunks.len() == 1 {
76        Ok(chunks.remove(0))
77    } else {
78        let dtype = chunks[0].dtype().clone();
79        Ok(ChunkedArray::try_new(chunks, dtype)
80            .vortex_expect("operation should succeed in arbitrary impl")
81            .into_array())
82    }
83}
84
85/// Creates a random array chunk.
86fn random_array_chunk(
87    u: &mut Unstructured<'_>,
88    dtype: &DType,
89    chunk_len: Option<usize>,
90) -> Result<ArrayRef> {
91    match dtype {
92        DType::Null => Ok(NullArray::new(
93            chunk_len
94                .map(Ok)
95                .unwrap_or_else(|| u.int_in_range(0..=100))?,
96        )
97        .into_array()),
98        DType::Bool(n) => random_bool(u, *n, chunk_len),
99        DType::Primitive(ptype, n) => match ptype {
100            PType::U8 => random_primitive::<u8>(u, *n, chunk_len),
101            PType::U16 => random_primitive::<u16>(u, *n, chunk_len),
102            PType::U32 => random_primitive::<u32>(u, *n, chunk_len),
103            PType::U64 => random_primitive::<u64>(u, *n, chunk_len),
104            PType::I8 => random_primitive::<i8>(u, *n, chunk_len),
105            PType::I16 => random_primitive::<i16>(u, *n, chunk_len),
106            PType::I32 => random_primitive::<i32>(u, *n, chunk_len),
107            PType::I64 => random_primitive::<i64>(u, *n, chunk_len),
108            PType::F16 => Ok(random_primitive::<u16>(u, *n, chunk_len)?
109                .to_primitive()
110                .reinterpret_cast(PType::F16)
111                .into_array()),
112            PType::F32 => random_primitive::<f32>(u, *n, chunk_len),
113            PType::F64 => random_primitive::<f64>(u, *n, chunk_len),
114        },
115        d @ DType::Decimal(decimal, n) => {
116            let elem_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
117            match_each_decimal_value_type!(
118                DecimalType::smallest_decimal_value_type(decimal),
119                |DVT| {
120                    let mut builder = DecimalBuilder::new::<DVT>(*decimal, *n);
121                    for _i in 0..elem_len {
122                        let random_decimal = random_scalar(u, d)?;
123                        builder.append_scalar(&random_decimal).vortex_expect(
124                            "was somehow unable to append a decimal to a decimal builder",
125                        );
126                    }
127                    Ok(builder.finish())
128                }
129            )
130        }
131        DType::Utf8(n) => random_string(u, *n, chunk_len),
132        DType::Binary(n) => random_bytes(u, *n, chunk_len),
133        DType::Struct(sdt, n) => {
134            let first_array = sdt
135                .fields()
136                .next()
137                .map(|d| random_array(u, &d, chunk_len))
138                .transpose()?;
139            let resolved_len = first_array
140                .as_ref()
141                .map(|a| a.len())
142                .or(chunk_len)
143                .map(Ok)
144                .unwrap_or_else(|| u.int_in_range(0..=100))?;
145            let children = first_array
146                .into_iter()
147                .map(Ok)
148                .chain(
149                    sdt.fields()
150                        .skip(1)
151                        .map(|d| random_array(u, &d, Some(resolved_len))),
152                )
153                .collect::<Result<Vec<_>>>()?;
154            Ok(StructArray::try_new(
155                sdt.names().clone(),
156                children,
157                resolved_len,
158                random_validity(u, *n, resolved_len)?,
159            )
160            .vortex_expect("operation should succeed in arbitrary impl")
161            .into_array())
162        }
163        DType::List(elem_dtype, null) => random_list(u, elem_dtype, *null, chunk_len),
164        DType::FixedSizeList(elem_dtype, list_size, null) => {
165            random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
166        }
167        DType::Extension(..) => {
168            todo!("Extension arrays are not implemented")
169        }
170    }
171}
172
173/// Creates a random fixed-size list array.
174///
175/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
176fn random_fixed_size_list(
177    u: &mut Unstructured,
178    elem_dtype: &Arc<DType>,
179    list_size: u32,
180    null: Nullability,
181    chunk_len: Option<usize>,
182) -> Result<ArrayRef> {
183    let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
184
185    let mut builder =
186        FixedSizeListBuilder::with_capacity(elem_dtype.clone(), list_size, null, array_length);
187
188    for _ in 0..array_length {
189        if null == Nullability::Nullable && u.arbitrary::<bool>()? {
190            builder.append_null();
191        } else {
192            builder
193                .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
194                .vortex_expect("can append value");
195        }
196    }
197
198    Ok(builder.finish())
199}
200
201/// Creates a random list array.
202///
203/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
204fn random_list(
205    u: &mut Unstructured,
206    elem_dtype: &Arc<DType>,
207    null: Nullability,
208    chunk_len: Option<usize>,
209) -> Result<ArrayRef> {
210    match u.int_in_range(0..=5)? {
211        0 => random_list_with_offset_type::<i16>(u, elem_dtype, null, chunk_len),
212        1 => random_list_with_offset_type::<i32>(u, elem_dtype, null, chunk_len),
213        2 => random_list_with_offset_type::<i64>(u, elem_dtype, null, chunk_len),
214        3 => random_list_with_offset_type::<u16>(u, elem_dtype, null, chunk_len),
215        4 => random_list_with_offset_type::<u32>(u, elem_dtype, null, chunk_len),
216        5 => random_list_with_offset_type::<u64>(u, elem_dtype, null, chunk_len),
217        _ => unreachable!("int_in_range returns a value in the above range"),
218    }
219}
220
221/// Creates a random list array with the given [`IntegerPType`] for the internal offsets child.
222///
223/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
224fn random_list_with_offset_type<O: IntegerPType>(
225    u: &mut Unstructured,
226    elem_dtype: &Arc<DType>,
227    null: Nullability,
228    chunk_len: Option<usize>,
229) -> Result<ArrayRef> {
230    let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
231
232    let mut builder = ListViewBuilder::<O, O>::with_capacity(elem_dtype.clone(), null, 20, 10);
233
234    for _ in 0..array_length {
235        if null == Nullability::Nullable && u.arbitrary::<bool>()? {
236            builder.append_null();
237        } else {
238            let list_size = u.int_in_range(0..=20)?;
239            builder
240                .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
241                .vortex_expect("can append value");
242        }
243    }
244
245    Ok(builder.finish())
246}
247
248/// Creates a random list scalar with the specified list size.
249fn random_list_scalar(
250    u: &mut Unstructured,
251    elem_dtype: &Arc<DType>,
252    list_size: u32,
253    null: Nullability,
254) -> Result<Scalar> {
255    let elems = (0..list_size)
256        .map(|_| random_scalar(u, elem_dtype))
257        .collect::<Result<Vec<_>>>()?;
258    Ok(Scalar::list(elem_dtype.clone(), elems, null))
259}
260
261fn random_string(
262    u: &mut Unstructured,
263    nullability: Nullability,
264    len: Option<usize>,
265) -> Result<ArrayRef> {
266    match nullability {
267        Nullability::NonNullable => {
268            let v = arbitrary_vec_of_len::<String>(u, len)?;
269            Ok(match u.int_in_range(0..=1)? {
270                0 => VarBinArray::from_vec(v, DType::Utf8(Nullability::NonNullable)).into_array(),
271                1 => VarBinViewArray::from_iter_str(v).into_array(),
272                _ => unreachable!(),
273            })
274        }
275        Nullability::Nullable => {
276            let v = arbitrary_vec_of_len::<Option<String>>(u, len)?;
277            Ok(match u.int_in_range(0..=1)? {
278                0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(),
279                1 => VarBinViewArray::from_iter_nullable_str(v).into_array(),
280                _ => unreachable!(),
281            })
282        }
283    }
284}
285
286fn random_bytes(
287    u: &mut Unstructured,
288    nullability: Nullability,
289    len: Option<usize>,
290) -> Result<ArrayRef> {
291    match nullability {
292        Nullability::NonNullable => {
293            let v = arbitrary_vec_of_len::<Vec<u8>>(u, len)?;
294            Ok(match u.int_in_range(0..=1)? {
295                0 => VarBinArray::from_vec(v, DType::Binary(Nullability::NonNullable)).into_array(),
296                1 => VarBinViewArray::from_iter_bin(v).into_array(),
297                _ => unreachable!(),
298            })
299        }
300        Nullability::Nullable => {
301            let v = arbitrary_vec_of_len::<Option<Vec<u8>>>(u, len)?;
302            Ok(match u.int_in_range(0..=1)? {
303                0 => VarBinArray::from_iter(v, DType::Binary(Nullability::Nullable)).into_array(),
304                1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(),
305                _ => unreachable!(),
306            })
307        }
308    }
309}
310
311fn random_primitive<'a, T: Arbitrary<'a> + NativePType>(
312    u: &mut Unstructured<'a>,
313    nullability: Nullability,
314    len: Option<usize>,
315) -> Result<ArrayRef> {
316    let v = arbitrary_vec_of_len::<T>(u, len)?;
317    let validity = random_validity(u, nullability, v.len())?;
318    Ok(PrimitiveArray::new(Buffer::copy_from(v), validity).into_array())
319}
320
321fn random_bool(
322    u: &mut Unstructured,
323    nullability: Nullability,
324    len: Option<usize>,
325) -> Result<ArrayRef> {
326    let v = arbitrary_vec_of_len(u, len)?;
327    let validity = random_validity(u, nullability, v.len())?;
328    Ok(BoolArray::from_bit_buffer(BitBuffer::from(v), validity).into_array())
329}
330
331fn random_validity(u: &mut Unstructured, nullability: Nullability, len: usize) -> Result<Validity> {
332    match nullability {
333        Nullability::NonNullable => Ok(Validity::NonNullable),
334        Nullability::Nullable => Ok(match u.int_in_range(0..=2)? {
335            0 => Validity::AllValid,
336            1 => Validity::AllInvalid,
337            2 => Validity::from_iter(arbitrary_vec_of_len::<bool>(u, Some(len))?),
338            _ => unreachable!(),
339        }),
340    }
341}
342
343fn arbitrary_vec_of_len<'a, T: Arbitrary<'a>>(
344    u: &mut Unstructured<'a>,
345    len: Option<usize>,
346) -> Result<Vec<T>> {
347    len.map(|l| (0..l).map(|_| T::arbitrary(u)).collect::<Result<Vec<_>>>())
348        .unwrap_or_else(|| Vec::<T>::arbitrary(u))
349}