Skip to main content

vortex_array/arrays/
arbitrary.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::iter;
5use std::sync::Arc;
6
7use arbitrary::Arbitrary;
8use arbitrary::Result;
9use arbitrary::Unstructured;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::Buffer;
12use vortex_error::VortexExpect;
13
14use super::BoolArray;
15use super::ChunkedArray;
16use super::NullArray;
17use super::PrimitiveArray;
18use super::StructArray;
19use crate::Array;
20use crate::ArrayRef;
21use crate::IntoArray;
22use crate::ToCanonical;
23use crate::arrays::VarBinArray;
24use crate::arrays::VarBinViewArray;
25use crate::builders::ArrayBuilder;
26use crate::builders::DecimalBuilder;
27use crate::builders::FixedSizeListBuilder;
28use crate::builders::ListViewBuilder;
29use crate::dtype::DType;
30use crate::dtype::IntegerPType;
31use crate::dtype::NativePType;
32use crate::dtype::Nullability;
33use crate::dtype::PType;
34use crate::match_each_decimal_value_type;
35use crate::scalar::Scalar;
36use crate::scalar::arbitrary::random_scalar;
37use crate::validity::Validity;
38
39/// A wrapper type to implement `Arbitrary` for `ArrayRef`.
40#[derive(Clone, Debug)]
41pub struct ArbitraryArray(pub ArrayRef);
42
43impl<'a> Arbitrary<'a> for ArbitraryArray {
44    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
45        let dtype = u.arbitrary()?;
46        Self::arbitrary_with(u, None, &dtype)
47    }
48}
49
50impl ArbitraryArray {
51    pub fn arbitrary_with(u: &mut Unstructured, len: Option<usize>, dtype: &DType) -> Result<Self> {
52        random_array(u, dtype, len).map(ArbitraryArray)
53    }
54}
55
56fn split_number_into_parts(n: usize, parts: usize) -> Vec<usize> {
57    let reminder = n % parts;
58    let division = (n - reminder) / parts;
59    iter::repeat_n(division, parts - reminder)
60        .chain(iter::repeat_n(division + 1, reminder))
61        .collect()
62}
63
64/// Creates a random array with a random number of chunks.
65fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Result<ArrayRef> {
66    let num_chunks = u.int_in_range(1..=3)?;
67    let chunk_lens = len.map(|l| split_number_into_parts(l, num_chunks));
68    let mut chunks = (0..num_chunks)
69        .map(|i| {
70            let chunk_len = chunk_lens.as_ref().map(|c| c[i]);
71            random_array_chunk(u, dtype, chunk_len)
72        })
73        .collect::<Result<Vec<_>>>()?;
74
75    if chunks.len() == 1 {
76        Ok(chunks.remove(0))
77    } else {
78        let dtype = chunks[0].dtype().clone();
79        Ok(ChunkedArray::try_new(chunks, dtype)
80            .vortex_expect("operation should succeed in arbitrary impl")
81            .into_array())
82    }
83}
84
85/// Creates a random array chunk.
86fn random_array_chunk(
87    u: &mut Unstructured<'_>,
88    dtype: &DType,
89    chunk_len: Option<usize>,
90) -> Result<ArrayRef> {
91    match dtype {
92        DType::Null => Ok(NullArray::new(
93            chunk_len
94                .map(Ok)
95                .unwrap_or_else(|| u.int_in_range(0..=100))?,
96        )
97        .into_array()),
98        DType::Bool(n) => random_bool(u, *n, chunk_len),
99        DType::Primitive(ptype, n) => match ptype {
100            PType::U8 => random_primitive::<u8>(u, *n, chunk_len),
101            PType::U16 => random_primitive::<u16>(u, *n, chunk_len),
102            PType::U32 => random_primitive::<u32>(u, *n, chunk_len),
103            PType::U64 => random_primitive::<u64>(u, *n, chunk_len),
104            PType::I8 => random_primitive::<i8>(u, *n, chunk_len),
105            PType::I16 => random_primitive::<i16>(u, *n, chunk_len),
106            PType::I32 => random_primitive::<i32>(u, *n, chunk_len),
107            PType::I64 => random_primitive::<i64>(u, *n, chunk_len),
108            PType::F16 => Ok(random_primitive::<u16>(u, *n, chunk_len)?
109                .to_primitive()
110                .reinterpret_cast(PType::F16)
111                .into_array()),
112            PType::F32 => random_primitive::<f32>(u, *n, chunk_len),
113            PType::F64 => random_primitive::<f64>(u, *n, chunk_len),
114        },
115        d @ DType::Decimal(decimal, n) => {
116            let elem_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
117            match_each_decimal_value_type!(DecimalType::smallest_decimal_value_type(decimal), |D| {
118                let mut builder = DecimalBuilder::new::<D>(*decimal, *n);
119                for _i in 0..elem_len {
120                    let random_decimal = random_scalar(u, d)?;
121                    builder.append_scalar(&random_decimal).vortex_expect(
122                        "was somehow unable to append a decimal to a decimal builder",
123                    );
124                }
125                Ok(builder.finish())
126            })
127        }
128        DType::Utf8(n) => random_string(u, *n, chunk_len),
129        DType::Binary(n) => random_bytes(u, *n, chunk_len),
130        DType::Struct(sdt, n) => {
131            let first_array = sdt
132                .fields()
133                .next()
134                .map(|d| random_array(u, &d, chunk_len))
135                .transpose()?;
136            let resolved_len = first_array
137                .as_ref()
138                .map(|a| a.len())
139                .or(chunk_len)
140                .map(Ok)
141                .unwrap_or_else(|| u.int_in_range(0..=100))?;
142            let children = first_array
143                .into_iter()
144                .map(Ok)
145                .chain(
146                    sdt.fields()
147                        .skip(1)
148                        .map(|d| random_array(u, &d, Some(resolved_len))),
149                )
150                .collect::<Result<Vec<_>>>()?;
151            Ok(StructArray::try_new(
152                sdt.names().clone(),
153                children,
154                resolved_len,
155                random_validity(u, *n, resolved_len)?,
156            )
157            .vortex_expect("operation should succeed in arbitrary impl")
158            .into_array())
159        }
160        DType::List(elem_dtype, null) => random_list(u, elem_dtype, *null, chunk_len),
161        DType::FixedSizeList(elem_dtype, list_size, null) => {
162            random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
163        }
164        DType::Extension(..) => {
165            todo!("Extension arrays are not implemented")
166        }
167    }
168}
169
170/// Creates a random fixed-size list array.
171///
172/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
173fn random_fixed_size_list(
174    u: &mut Unstructured,
175    elem_dtype: &Arc<DType>,
176    list_size: u32,
177    null: Nullability,
178    chunk_len: Option<usize>,
179) -> Result<ArrayRef> {
180    let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
181
182    let mut builder =
183        FixedSizeListBuilder::with_capacity(elem_dtype.clone(), list_size, null, array_length);
184
185    for _ in 0..array_length {
186        if null == Nullability::Nullable && u.arbitrary::<bool>()? {
187            builder.append_null();
188        } else {
189            builder
190                .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
191                .vortex_expect("can append value");
192        }
193    }
194
195    Ok(builder.finish())
196}
197
198/// Creates a random list array.
199///
200/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
201fn random_list(
202    u: &mut Unstructured,
203    elem_dtype: &Arc<DType>,
204    null: Nullability,
205    chunk_len: Option<usize>,
206) -> Result<ArrayRef> {
207    match u.int_in_range(0..=5)? {
208        0 => random_list_with_offset_type::<i16>(u, elem_dtype, null, chunk_len),
209        1 => random_list_with_offset_type::<i32>(u, elem_dtype, null, chunk_len),
210        2 => random_list_with_offset_type::<i64>(u, elem_dtype, null, chunk_len),
211        3 => random_list_with_offset_type::<u16>(u, elem_dtype, null, chunk_len),
212        4 => random_list_with_offset_type::<u32>(u, elem_dtype, null, chunk_len),
213        5 => random_list_with_offset_type::<u64>(u, elem_dtype, null, chunk_len),
214        _ => unreachable!("int_in_range returns a value in the above range"),
215    }
216}
217
218/// Creates a random list array with the given [`IntegerPType`] for the internal offsets child.
219///
220/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
221fn random_list_with_offset_type<O: IntegerPType>(
222    u: &mut Unstructured,
223    elem_dtype: &Arc<DType>,
224    null: Nullability,
225    chunk_len: Option<usize>,
226) -> Result<ArrayRef> {
227    let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
228
229    let mut builder = ListViewBuilder::<O, O>::with_capacity(elem_dtype.clone(), null, 20, 10);
230
231    for _ in 0..array_length {
232        if null == Nullability::Nullable && u.arbitrary::<bool>()? {
233            builder.append_null();
234        } else {
235            let list_size = u.int_in_range(0..=20)?;
236            builder
237                .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
238                .vortex_expect("can append value");
239        }
240    }
241
242    Ok(builder.finish())
243}
244
245/// Creates a random list scalar with the specified list size.
246fn random_list_scalar(
247    u: &mut Unstructured,
248    elem_dtype: &Arc<DType>,
249    list_size: u32,
250    null: Nullability,
251) -> Result<Scalar> {
252    let elems = (0..list_size)
253        .map(|_| random_scalar(u, elem_dtype))
254        .collect::<Result<Vec<_>>>()?;
255    Ok(Scalar::list(elem_dtype.clone(), elems, null))
256}
257
258fn random_string(
259    u: &mut Unstructured,
260    nullability: Nullability,
261    len: Option<usize>,
262) -> Result<ArrayRef> {
263    match nullability {
264        Nullability::NonNullable => {
265            let v = arbitrary_vec_of_len::<String>(u, len)?;
266            Ok(match u.int_in_range(0..=1)? {
267                0 => VarBinArray::from_vec(v, DType::Utf8(Nullability::NonNullable)).into_array(),
268                1 => VarBinViewArray::from_iter_str(v).into_array(),
269                _ => unreachable!(),
270            })
271        }
272        Nullability::Nullable => {
273            let v = arbitrary_vec_of_len::<Option<String>>(u, len)?;
274            Ok(match u.int_in_range(0..=1)? {
275                0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(),
276                1 => VarBinViewArray::from_iter_nullable_str(v).into_array(),
277                _ => unreachable!(),
278            })
279        }
280    }
281}
282
283fn random_bytes(
284    u: &mut Unstructured,
285    nullability: Nullability,
286    len: Option<usize>,
287) -> Result<ArrayRef> {
288    match nullability {
289        Nullability::NonNullable => {
290            let v = arbitrary_vec_of_len::<Vec<u8>>(u, len)?;
291            Ok(match u.int_in_range(0..=1)? {
292                0 => VarBinArray::from_vec(v, DType::Binary(Nullability::NonNullable)).into_array(),
293                1 => VarBinViewArray::from_iter_bin(v).into_array(),
294                _ => unreachable!(),
295            })
296        }
297        Nullability::Nullable => {
298            let v = arbitrary_vec_of_len::<Option<Vec<u8>>>(u, len)?;
299            Ok(match u.int_in_range(0..=1)? {
300                0 => VarBinArray::from_iter(v, DType::Binary(Nullability::Nullable)).into_array(),
301                1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(),
302                _ => unreachable!(),
303            })
304        }
305    }
306}
307
308fn random_primitive<'a, T: Arbitrary<'a> + NativePType>(
309    u: &mut Unstructured<'a>,
310    nullability: Nullability,
311    len: Option<usize>,
312) -> Result<ArrayRef> {
313    let v = arbitrary_vec_of_len::<T>(u, len)?;
314    let validity = random_validity(u, nullability, v.len())?;
315    Ok(PrimitiveArray::new(Buffer::copy_from(v), validity).into_array())
316}
317
318fn random_bool(
319    u: &mut Unstructured,
320    nullability: Nullability,
321    len: Option<usize>,
322) -> Result<ArrayRef> {
323    let v = arbitrary_vec_of_len(u, len)?;
324    let validity = random_validity(u, nullability, v.len())?;
325    Ok(BoolArray::new(BitBuffer::from(v), validity).into_array())
326}
327
328pub fn random_validity(
329    u: &mut Unstructured,
330    nullability: Nullability,
331    len: usize,
332) -> Result<Validity> {
333    match nullability {
334        Nullability::NonNullable => Ok(Validity::NonNullable),
335        Nullability::Nullable => Ok(match u.int_in_range(0..=2)? {
336            0 => Validity::AllValid,
337            1 => Validity::AllInvalid,
338            2 => Validity::from_iter(arbitrary_vec_of_len::<bool>(u, Some(len))?),
339            _ => unreachable!(),
340        }),
341    }
342}
343
344fn arbitrary_vec_of_len<'a, T: Arbitrary<'a>>(
345    u: &mut Unstructured<'a>,
346    len: Option<usize>,
347) -> Result<Vec<T>> {
348    len.map(|l| (0..l).map(|_| T::arbitrary(u)).collect::<Result<Vec<_>>>())
349        .unwrap_or_else(|| Vec::<T>::arbitrary(u))
350}