1use std::iter;
5use std::sync::Arc;
6
7use arbitrary::{Arbitrary, Result, Unstructured};
8use vortex_buffer::{BitBuffer, Buffer};
9use vortex_dtype::{
10 DType, IntegerPType, NativePType, Nullability, PType, match_each_decimal_value_type,
11};
12use vortex_error::{VortexExpect, VortexUnwrap};
13use vortex_scalar::Scalar;
14use vortex_scalar::arbitrary::random_scalar;
15
16use super::{BoolArray, ChunkedArray, NullArray, PrimitiveArray, StructArray};
17use crate::arrays::{VarBinArray, VarBinViewArray};
18use crate::builders::{ArrayBuilder, DecimalBuilder, FixedSizeListBuilder, ListViewBuilder};
19use crate::validity::Validity;
20use crate::{Array, ArrayRef, IntoArray, ToCanonical};
21
22#[derive(Clone, Debug)]
24pub struct ArbitraryArray(pub ArrayRef);
25
26impl<'a> Arbitrary<'a> for ArbitraryArray {
27 fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
28 let dtype = u.arbitrary()?;
29 Self::arbitrary_with(u, None, &dtype)
30 }
31}
32
33impl ArbitraryArray {
34 pub fn arbitrary_with(u: &mut Unstructured, len: Option<usize>, dtype: &DType) -> Result<Self> {
35 random_array(u, dtype, len).map(ArbitraryArray)
36 }
37}
38
39fn split_number_into_parts(n: usize, parts: usize) -> Vec<usize> {
40 let reminder = n % parts;
41 let division = (n - reminder) / parts;
42 iter::repeat_n(division, parts - reminder)
43 .chain(iter::repeat_n(division + 1, reminder))
44 .collect()
45}
46
47fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Result<ArrayRef> {
49 let num_chunks = u.int_in_range(1..=3)?;
50 let chunk_lens = len.map(|l| split_number_into_parts(l, num_chunks));
51 let mut chunks = (0..num_chunks)
52 .map(|i| {
53 let chunk_len = chunk_lens.as_ref().map(|c| c[i]);
54 random_array_chunk(u, dtype, chunk_len)
55 })
56 .collect::<Result<Vec<_>>>()?;
57
58 if chunks.len() == 1 {
59 Ok(chunks.remove(0))
60 } else {
61 let dtype = chunks[0].dtype().clone();
62 Ok(ChunkedArray::try_new(chunks, dtype)
63 .vortex_unwrap()
64 .into_array())
65 }
66}
67
68fn random_array_chunk(
70 u: &mut Unstructured<'_>,
71 dtype: &DType,
72 chunk_len: Option<usize>,
73) -> Result<ArrayRef> {
74 match dtype {
75 DType::Null => Ok(NullArray::new(
76 chunk_len
77 .map(Ok)
78 .unwrap_or_else(|| u.int_in_range(0..=100))?,
79 )
80 .into_array()),
81 DType::Bool(n) => random_bool(u, *n, chunk_len),
82 DType::Primitive(ptype, n) => match ptype {
83 PType::U8 => random_primitive::<u8>(u, *n, chunk_len),
84 PType::U16 => random_primitive::<u16>(u, *n, chunk_len),
85 PType::U32 => random_primitive::<u32>(u, *n, chunk_len),
86 PType::U64 => random_primitive::<u64>(u, *n, chunk_len),
87 PType::I8 => random_primitive::<i8>(u, *n, chunk_len),
88 PType::I16 => random_primitive::<i16>(u, *n, chunk_len),
89 PType::I32 => random_primitive::<i32>(u, *n, chunk_len),
90 PType::I64 => random_primitive::<i64>(u, *n, chunk_len),
91 PType::F16 => Ok(random_primitive::<u16>(u, *n, chunk_len)?
92 .to_primitive()
93 .reinterpret_cast(PType::F16)
94 .into_array()),
95 PType::F32 => random_primitive::<f32>(u, *n, chunk_len),
96 PType::F64 => random_primitive::<f64>(u, *n, chunk_len),
97 },
98 d @ DType::Decimal(decimal, n) => {
99 let elem_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
100 match_each_decimal_value_type!(
101 DecimalType::smallest_decimal_value_type(decimal),
102 |DVT| {
103 let mut builder = DecimalBuilder::new::<DVT>(*decimal, *n);
104 for _i in 0..elem_len {
105 let random_decimal = random_scalar(u, d)?;
106 builder.append_scalar(&random_decimal).vortex_expect(
107 "was somehow unable to append a decimal to a decimal builder",
108 );
109 }
110 Ok(builder.finish())
111 }
112 )
113 }
114 DType::Utf8(n) => random_string(u, *n, chunk_len),
115 DType::Binary(n) => random_bytes(u, *n, chunk_len),
116 DType::Struct(sdt, n) => {
117 let first_array = sdt
118 .fields()
119 .next()
120 .map(|d| random_array(u, &d, chunk_len))
121 .transpose()?;
122 let resolved_len = first_array
123 .as_ref()
124 .map(|a| a.len())
125 .or(chunk_len)
126 .map(Ok)
127 .unwrap_or_else(|| u.int_in_range(0..=100))?;
128 let children = first_array
129 .into_iter()
130 .map(Ok)
131 .chain(
132 sdt.fields()
133 .skip(1)
134 .map(|d| random_array(u, &d, Some(resolved_len))),
135 )
136 .collect::<Result<Vec<_>>>()?;
137 Ok(StructArray::try_new(
138 sdt.names().clone(),
139 children,
140 resolved_len,
141 random_validity(u, *n, resolved_len)?,
142 )
143 .vortex_unwrap()
144 .into_array())
145 }
146 DType::List(elem_dtype, null) => random_list(u, elem_dtype, *null, chunk_len),
147 DType::FixedSizeList(elem_dtype, list_size, null) => {
148 random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
149 }
150 DType::Extension(..) => {
151 todo!("Extension arrays are not implemented")
152 }
153 }
154}
155
156fn random_fixed_size_list(
160 u: &mut Unstructured,
161 elem_dtype: &Arc<DType>,
162 list_size: u32,
163 null: Nullability,
164 chunk_len: Option<usize>,
165) -> Result<ArrayRef> {
166 let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
167
168 let mut builder =
169 FixedSizeListBuilder::with_capacity(elem_dtype.clone(), list_size, null, array_length);
170
171 for _ in 0..array_length {
172 if null == Nullability::Nullable && u.arbitrary::<bool>()? {
173 builder.append_null();
174 } else {
175 builder
176 .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
177 .vortex_expect("can append value");
178 }
179 }
180
181 Ok(builder.finish())
182}
183
184fn random_list(
188 u: &mut Unstructured,
189 elem_dtype: &Arc<DType>,
190 null: Nullability,
191 chunk_len: Option<usize>,
192) -> Result<ArrayRef> {
193 match u.int_in_range(0..=5)? {
194 0 => random_list_with_offset_type::<i16>(u, elem_dtype, null, chunk_len),
195 1 => random_list_with_offset_type::<i32>(u, elem_dtype, null, chunk_len),
196 2 => random_list_with_offset_type::<i64>(u, elem_dtype, null, chunk_len),
197 3 => random_list_with_offset_type::<u16>(u, elem_dtype, null, chunk_len),
198 4 => random_list_with_offset_type::<u32>(u, elem_dtype, null, chunk_len),
199 5 => random_list_with_offset_type::<u64>(u, elem_dtype, null, chunk_len),
200 _ => unreachable!("int_in_range returns a value in the above range"),
201 }
202}
203
204fn random_list_with_offset_type<O: IntegerPType>(
208 u: &mut Unstructured,
209 elem_dtype: &Arc<DType>,
210 null: Nullability,
211 chunk_len: Option<usize>,
212) -> Result<ArrayRef> {
213 let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
214
215 let mut builder = ListViewBuilder::<O, O>::with_capacity(elem_dtype.clone(), null, 20, 10);
216
217 for _ in 0..array_length {
218 if null == Nullability::Nullable && u.arbitrary::<bool>()? {
219 builder.append_null();
220 } else {
221 let list_size = u.int_in_range(0..=20)?;
222 builder
223 .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
224 .vortex_expect("can append value");
225 }
226 }
227
228 Ok(builder.finish())
229}
230
231fn random_list_scalar(
233 u: &mut Unstructured,
234 elem_dtype: &Arc<DType>,
235 list_size: u32,
236 null: Nullability,
237) -> Result<Scalar> {
238 let elems = (0..list_size)
239 .map(|_| random_scalar(u, elem_dtype))
240 .collect::<Result<Vec<_>>>()?;
241 Ok(Scalar::list(elem_dtype.clone(), elems, null))
242}
243
244fn random_string(
245 u: &mut Unstructured,
246 nullability: Nullability,
247 len: Option<usize>,
248) -> Result<ArrayRef> {
249 match nullability {
250 Nullability::NonNullable => {
251 let v = arbitrary_vec_of_len::<String>(u, len)?;
252 Ok(match u.int_in_range(0..=1)? {
253 0 => VarBinArray::from_vec(v, DType::Utf8(Nullability::NonNullable)).into_array(),
254 1 => VarBinViewArray::from_iter_str(v).into_array(),
255 _ => unreachable!(),
256 })
257 }
258 Nullability::Nullable => {
259 let v = arbitrary_vec_of_len::<Option<String>>(u, len)?;
260 Ok(match u.int_in_range(0..=1)? {
261 0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(),
262 1 => VarBinViewArray::from_iter_nullable_str(v).into_array(),
263 _ => unreachable!(),
264 })
265 }
266 }
267}
268
269fn random_bytes(
270 u: &mut Unstructured,
271 nullability: Nullability,
272 len: Option<usize>,
273) -> Result<ArrayRef> {
274 match nullability {
275 Nullability::NonNullable => {
276 let v = arbitrary_vec_of_len::<Vec<u8>>(u, len)?;
277 Ok(match u.int_in_range(0..=1)? {
278 0 => VarBinArray::from_vec(v, DType::Binary(Nullability::NonNullable)).into_array(),
279 1 => VarBinViewArray::from_iter_bin(v).into_array(),
280 _ => unreachable!(),
281 })
282 }
283 Nullability::Nullable => {
284 let v = arbitrary_vec_of_len::<Option<Vec<u8>>>(u, len)?;
285 Ok(match u.int_in_range(0..=1)? {
286 0 => VarBinArray::from_iter(v, DType::Binary(Nullability::Nullable)).into_array(),
287 1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(),
288 _ => unreachable!(),
289 })
290 }
291 }
292}
293
294fn random_primitive<'a, T: Arbitrary<'a> + NativePType>(
295 u: &mut Unstructured<'a>,
296 nullability: Nullability,
297 len: Option<usize>,
298) -> Result<ArrayRef> {
299 let v = arbitrary_vec_of_len::<T>(u, len)?;
300 let validity = random_validity(u, nullability, v.len())?;
301 Ok(PrimitiveArray::new(Buffer::copy_from(v), validity).into_array())
302}
303
304fn random_bool(
305 u: &mut Unstructured,
306 nullability: Nullability,
307 len: Option<usize>,
308) -> Result<ArrayRef> {
309 let v = arbitrary_vec_of_len(u, len)?;
310 let validity = random_validity(u, nullability, v.len())?;
311 Ok(BoolArray::from_bit_buffer(BitBuffer::from(v), validity).into_array())
312}
313
314fn random_validity(u: &mut Unstructured, nullability: Nullability, len: usize) -> Result<Validity> {
315 match nullability {
316 Nullability::NonNullable => Ok(Validity::NonNullable),
317 Nullability::Nullable => Ok(match u.int_in_range(0..=2)? {
318 0 => Validity::AllValid,
319 1 => Validity::AllInvalid,
320 2 => Validity::from_iter(arbitrary_vec_of_len::<bool>(u, Some(len))?),
321 _ => unreachable!(),
322 }),
323 }
324}
325
326fn arbitrary_vec_of_len<'a, T: Arbitrary<'a>>(
327 u: &mut Unstructured<'a>,
328 len: Option<usize>,
329) -> Result<Vec<T>> {
330 len.map(|l| (0..l).map(|_| T::arbitrary(u)).collect::<Result<Vec<_>>>())
331 .unwrap_or_else(|| Vec::<T>::arbitrary(u))
332}