Skip to main content

vortex_array/arrays/
arbitrary.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::iter;
5use std::ops::RangeInclusive;
6use std::sync::Arc;
7
8use arbitrary::Arbitrary;
9use arbitrary::Error::IncorrectFormat;
10use arbitrary::Result;
11use arbitrary::Unstructured;
12use vortex_buffer::BitBuffer;
13use vortex_buffer::Buffer;
14use vortex_error::VortexExpect;
15
16use crate::ArrayRef;
17use crate::IntoArray;
18#[expect(deprecated)]
19use crate::ToCanonical as _;
20use crate::arrays::BoolArray;
21use crate::arrays::ChunkedArray;
22use crate::arrays::NullArray;
23use crate::arrays::PrimitiveArray;
24use crate::arrays::StructArray;
25use crate::arrays::VarBinArray;
26use crate::arrays::VarBinViewArray;
27use crate::arrays::primitive::PrimitiveArrayExt;
28use crate::builders::ArrayBuilder;
29use crate::builders::DecimalBuilder;
30use crate::builders::FixedSizeListBuilder;
31use crate::builders::ListViewBuilder;
32use crate::dtype::DType;
33use crate::dtype::IntegerPType;
34use crate::dtype::NativePType;
35use crate::dtype::Nullability;
36use crate::dtype::PType;
37use crate::match_each_decimal_value_type;
38use crate::scalar::Scalar;
39use crate::scalar::arbitrary::random_scalar;
40use crate::validity::Validity;
41
42/// A wrapper type to implement `Arbitrary` for `ArrayRef`.
43#[derive(Clone, Debug)]
44pub struct ArbitraryArray(pub ArrayRef);
45
46/// Trait for generating arbitrary values with a caller-provided configuration.
47pub trait ArbitraryWith<'a, C>: Sized {
48    /// Generate an arbitrary value using the provided configuration.
49    fn arbitrary_with_config(u: &mut Unstructured<'a>, config: &C) -> Result<Self>;
50}
51
52/// Configuration for arbitrary array generation.
53#[derive(Clone, Debug)]
54pub struct ArbitraryArrayConfig {
55    /// Fixed dtype, or `None` to generate one from [`Unstructured`].
56    pub dtype: Option<DType>,
57    /// Inclusive range for the total array length.
58    pub len: RangeInclusive<usize>,
59}
60
61impl<'a> ArbitraryWith<'a, ArbitraryArrayConfig> for ArbitraryArray {
62    fn arbitrary_with_config(
63        u: &mut Unstructured<'a>,
64        config: &ArbitraryArrayConfig,
65    ) -> Result<Self> {
66        if config.len.is_empty() {
67            return Err(IncorrectFormat);
68        }
69
70        let dtype = match &config.dtype {
71            Some(dtype) => dtype.clone(),
72            None => u.arbitrary()?,
73        };
74        let len = u.int_in_range(config.len.clone())?;
75
76        random_array(u, &dtype, Some(len)).map(ArbitraryArray)
77    }
78}
79
80fn split_number_into_parts(n: usize, parts: usize) -> Vec<usize> {
81    let reminder = n % parts;
82    let division = (n - reminder) / parts;
83    iter::repeat_n(division, parts - reminder)
84        .chain(iter::repeat_n(division + 1, reminder))
85        .collect()
86}
87
88/// Creates a random array with a random number of chunks.
89fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Result<ArrayRef> {
90    let num_chunks = u.int_in_range(1..=3)?;
91    let chunk_lens = len.map(|l| split_number_into_parts(l, num_chunks));
92    let mut chunks = (0..num_chunks)
93        .map(|i| {
94            let chunk_len = chunk_lens.as_ref().map(|c| c[i]);
95            random_array_chunk(u, dtype, chunk_len)
96        })
97        .collect::<Result<Vec<_>>>()?;
98
99    if chunks.len() == 1 {
100        Ok(chunks.remove(0))
101    } else {
102        let dtype = chunks[0].dtype().clone();
103        Ok(ChunkedArray::try_new(chunks, dtype)
104            .vortex_expect("operation should succeed in arbitrary impl")
105            .into_array())
106    }
107}
108
109/// Creates a random array chunk.
110fn random_array_chunk(
111    u: &mut Unstructured<'_>,
112    dtype: &DType,
113    chunk_len: Option<usize>,
114) -> Result<ArrayRef> {
115    match dtype {
116        DType::Null => Ok(NullArray::new(
117            chunk_len
118                .map(Ok)
119                .unwrap_or_else(|| u.int_in_range(0..=100))?,
120        )
121        .into_array()),
122        DType::Bool(n) => random_bool(u, *n, chunk_len),
123        DType::Primitive(ptype, n) => match ptype {
124            PType::U8 => random_primitive::<u8>(u, *n, chunk_len),
125            PType::U16 => random_primitive::<u16>(u, *n, chunk_len),
126            PType::U32 => random_primitive::<u32>(u, *n, chunk_len),
127            PType::U64 => random_primitive::<u64>(u, *n, chunk_len),
128            PType::I8 => random_primitive::<i8>(u, *n, chunk_len),
129            PType::I16 => random_primitive::<i16>(u, *n, chunk_len),
130            PType::I32 => random_primitive::<i32>(u, *n, chunk_len),
131            PType::I64 => random_primitive::<i64>(u, *n, chunk_len),
132            PType::F16 => {
133                #[expect(deprecated)]
134                let prim = random_primitive::<u16>(u, *n, chunk_len)?
135                    .to_primitive()
136                    .reinterpret_cast(PType::F16)
137                    .into_array();
138                Ok(prim)
139            }
140            PType::F32 => random_primitive::<f32>(u, *n, chunk_len),
141            PType::F64 => random_primitive::<f64>(u, *n, chunk_len),
142        },
143        d @ DType::Decimal(decimal, n) => {
144            let elem_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
145            match_each_decimal_value_type!(DecimalType::smallest_decimal_value_type(decimal), |D| {
146                let mut builder = DecimalBuilder::new::<D>(*decimal, *n);
147                for _i in 0..elem_len {
148                    let random_decimal = random_scalar(u, d)?;
149                    builder.append_scalar(&random_decimal).vortex_expect(
150                        "was somehow unable to append a decimal to a decimal builder",
151                    );
152                }
153                Ok(builder.finish())
154            })
155        }
156        DType::Utf8(n) => random_string(u, *n, chunk_len),
157        DType::Binary(n) => random_bytes(u, *n, chunk_len),
158        DType::List(elem_dtype, null) => random_list(u, elem_dtype, *null, chunk_len),
159        DType::FixedSizeList(elem_dtype, list_size, null) => {
160            random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
161        }
162        DType::Struct(sdt, n) => {
163            let first_array = sdt
164                .fields()
165                .next()
166                .map(|d| random_array(u, &d, chunk_len))
167                .transpose()?;
168            let resolved_len = first_array
169                .as_ref()
170                .map(|a| a.len())
171                .or(chunk_len)
172                .map(Ok)
173                .unwrap_or_else(|| u.int_in_range(0..=100))?;
174            let children = first_array
175                .into_iter()
176                .map(Ok)
177                .chain(
178                    sdt.fields()
179                        .skip(1)
180                        .map(|d| random_array(u, &d, Some(resolved_len))),
181                )
182                .collect::<Result<Vec<_>>>()?;
183            Ok(StructArray::try_new(
184                sdt.names().clone(),
185                children,
186                resolved_len,
187                random_validity(u, *n, resolved_len)?,
188            )
189            .vortex_expect("operation should succeed in arbitrary impl")
190            .into_array())
191        }
192        DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
193        DType::Variant(_) => {
194            unimplemented!("Variant arrays are not implemented")
195        }
196        DType::Extension(..) => {
197            unimplemented!("Extension arrays are not implemented")
198        }
199    }
200}
201
202/// Creates a random fixed-size list array.
203///
204/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
205fn random_fixed_size_list(
206    u: &mut Unstructured,
207    elem_dtype: &Arc<DType>,
208    list_size: u32,
209    null: Nullability,
210    chunk_len: Option<usize>,
211) -> Result<ArrayRef> {
212    let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
213
214    let mut builder =
215        FixedSizeListBuilder::with_capacity(Arc::clone(elem_dtype), list_size, null, array_length);
216
217    for _ in 0..array_length {
218        if null == Nullability::Nullable && u.arbitrary::<bool>()? {
219            builder.append_null();
220        } else {
221            builder
222                .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
223                .vortex_expect("can append value");
224        }
225    }
226
227    Ok(builder.finish())
228}
229
230/// Creates a random list array.
231///
232/// If the `chunk_len` is specified, the length of the array will be equal to the chunk length.
233fn random_list(
234    u: &mut Unstructured,
235    elem_dtype: &Arc<DType>,
236    null: Nullability,
237    chunk_len: Option<usize>,
238) -> Result<ArrayRef> {
239    let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
240    // Worst-case total elements: each list can have up to 20 elements.
241    let max_total_elements = array_length as u64 * 20;
242
243    match u.int_in_range(0..=5)? {
244        0 if i16::max_value_as_u64() >= max_total_elements => {
245            random_list_with_offset_type::<i16>(u, elem_dtype, null, array_length)
246        }
247        1 if i32::max_value_as_u64() >= max_total_elements => {
248            random_list_with_offset_type::<i32>(u, elem_dtype, null, array_length)
249        }
250        3 if u16::max_value_as_u64() >= max_total_elements => {
251            random_list_with_offset_type::<u16>(u, elem_dtype, null, array_length)
252        }
253        4 if u32::max_value_as_u64() >= max_total_elements => {
254            random_list_with_offset_type::<u32>(u, elem_dtype, null, array_length)
255        }
256        // i64 and u64 always fit; also the fallback for when narrower types don't.
257        _ => {
258            if u.arbitrary::<bool>()? {
259                random_list_with_offset_type::<i64>(u, elem_dtype, null, array_length)
260            } else {
261                random_list_with_offset_type::<u64>(u, elem_dtype, null, array_length)
262            }
263        }
264    }
265}
266
267/// Creates a random list array with the given [`IntegerPType`] for the internal offsets child.
268fn random_list_with_offset_type<O: IntegerPType>(
269    u: &mut Unstructured,
270    elem_dtype: &Arc<DType>,
271    null: Nullability,
272    array_length: usize,
273) -> Result<ArrayRef> {
274    let mut builder =
275        ListViewBuilder::<O, O>::with_capacity(Arc::clone(elem_dtype), null, array_length, 10);
276
277    for _ in 0..array_length {
278        if null == Nullability::Nullable && u.arbitrary::<bool>()? {
279            builder.append_null();
280        } else {
281            let list_size = u.int_in_range(0..=20)?;
282            builder
283                .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
284                .vortex_expect("can append value");
285        }
286    }
287
288    Ok(builder.finish())
289}
290
291/// Creates a random list scalar with the specified list size.
292fn random_list_scalar(
293    u: &mut Unstructured,
294    elem_dtype: &Arc<DType>,
295    list_size: u32,
296    null: Nullability,
297) -> Result<Scalar> {
298    let elems = (0..list_size)
299        .map(|_| random_scalar(u, elem_dtype))
300        .collect::<Result<Vec<_>>>()?;
301    Ok(Scalar::list(Arc::clone(elem_dtype), elems, null))
302}
303
304fn random_string(
305    u: &mut Unstructured,
306    nullability: Nullability,
307    len: Option<usize>,
308) -> Result<ArrayRef> {
309    match nullability {
310        Nullability::NonNullable => {
311            let v = arbitrary_vec_of_len::<String>(u, len)?;
312            Ok(match u.int_in_range(0..=1)? {
313                0 => VarBinArray::from_vec(v, DType::Utf8(Nullability::NonNullable)).into_array(),
314                1 => VarBinViewArray::from_iter_str(v).into_array(),
315                _ => unreachable!(),
316            })
317        }
318        Nullability::Nullable => {
319            let v = arbitrary_vec_of_len::<Option<String>>(u, len)?;
320            Ok(match u.int_in_range(0..=1)? {
321                0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(),
322                1 => VarBinViewArray::from_iter_nullable_str(v).into_array(),
323                _ => unreachable!(),
324            })
325        }
326    }
327}
328
329fn random_bytes(
330    u: &mut Unstructured,
331    nullability: Nullability,
332    len: Option<usize>,
333) -> Result<ArrayRef> {
334    match nullability {
335        Nullability::NonNullable => {
336            let v = arbitrary_vec_of_len::<Vec<u8>>(u, len)?;
337            Ok(match u.int_in_range(0..=1)? {
338                0 => VarBinArray::from_vec(v, DType::Binary(Nullability::NonNullable)).into_array(),
339                1 => VarBinViewArray::from_iter_bin(v).into_array(),
340                _ => unreachable!(),
341            })
342        }
343        Nullability::Nullable => {
344            let v = arbitrary_vec_of_len::<Option<Vec<u8>>>(u, len)?;
345            Ok(match u.int_in_range(0..=1)? {
346                0 => VarBinArray::from_iter(v, DType::Binary(Nullability::Nullable)).into_array(),
347                1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(),
348                _ => unreachable!(),
349            })
350        }
351    }
352}
353
354fn random_primitive<'a, T: Arbitrary<'a> + NativePType>(
355    u: &mut Unstructured<'a>,
356    nullability: Nullability,
357    len: Option<usize>,
358) -> Result<ArrayRef> {
359    let v = arbitrary_vec_of_len::<T>(u, len)?;
360    let validity = random_validity(u, nullability, v.len())?;
361    Ok(PrimitiveArray::new(Buffer::copy_from(v), validity).into_array())
362}
363
364fn random_bool(
365    u: &mut Unstructured,
366    nullability: Nullability,
367    len: Option<usize>,
368) -> Result<ArrayRef> {
369    let v = arbitrary_vec_of_len(u, len)?;
370    let validity = random_validity(u, nullability, v.len())?;
371    Ok(BoolArray::new(BitBuffer::from(v), validity).into_array())
372}
373
374pub fn random_validity(
375    u: &mut Unstructured,
376    nullability: Nullability,
377    len: usize,
378) -> Result<Validity> {
379    match nullability {
380        Nullability::NonNullable => Ok(Validity::NonNullable),
381        Nullability::Nullable => Ok(match u.int_in_range(0..=2)? {
382            0 => Validity::AllValid,
383            1 => Validity::AllInvalid,
384            2 => Validity::from_iter(arbitrary_vec_of_len::<bool>(u, Some(len))?),
385            _ => unreachable!(),
386        }),
387    }
388}
389
390fn arbitrary_vec_of_len<'a, T: Arbitrary<'a>>(
391    u: &mut Unstructured<'a>,
392    len: Option<usize>,
393) -> Result<Vec<T>> {
394    len.map(|l| (0..l).map(|_| T::arbitrary(u)).collect::<Result<Vec<_>>>())
395        .unwrap_or_else(|| Vec::<T>::arbitrary(u))
396}