1use std::iter;
5use std::ops::RangeInclusive;
6use std::sync::Arc;
7
8use arbitrary::Arbitrary;
9use arbitrary::Error::IncorrectFormat;
10use arbitrary::Result;
11use arbitrary::Unstructured;
12use vortex_buffer::BitBuffer;
13use vortex_buffer::Buffer;
14use vortex_error::VortexExpect;
15
16use crate::ArrayRef;
17use crate::IntoArray;
18#[expect(deprecated)]
19use crate::ToCanonical as _;
20use crate::arrays::BoolArray;
21use crate::arrays::ChunkedArray;
22use crate::arrays::NullArray;
23use crate::arrays::PrimitiveArray;
24use crate::arrays::StructArray;
25use crate::arrays::VarBinArray;
26use crate::arrays::VarBinViewArray;
27use crate::arrays::primitive::PrimitiveArrayExt;
28use crate::builders::ArrayBuilder;
29use crate::builders::DecimalBuilder;
30use crate::builders::FixedSizeListBuilder;
31use crate::builders::ListViewBuilder;
32use crate::dtype::DType;
33use crate::dtype::IntegerPType;
34use crate::dtype::NativePType;
35use crate::dtype::Nullability;
36use crate::dtype::PType;
37use crate::match_each_decimal_value_type;
38use crate::scalar::Scalar;
39use crate::scalar::arbitrary::random_scalar;
40use crate::validity::Validity;
41
42#[derive(Clone, Debug)]
44pub struct ArbitraryArray(pub ArrayRef);
45
46pub trait ArbitraryWith<'a, C>: Sized {
48 fn arbitrary_with_config(u: &mut Unstructured<'a>, config: &C) -> Result<Self>;
50}
51
52#[derive(Clone, Debug)]
54pub struct ArbitraryArrayConfig {
55 pub dtype: Option<DType>,
57 pub len: RangeInclusive<usize>,
59}
60
61impl<'a> ArbitraryWith<'a, ArbitraryArrayConfig> for ArbitraryArray {
62 fn arbitrary_with_config(
63 u: &mut Unstructured<'a>,
64 config: &ArbitraryArrayConfig,
65 ) -> Result<Self> {
66 if config.len.is_empty() {
67 return Err(IncorrectFormat);
68 }
69
70 let dtype = match &config.dtype {
71 Some(dtype) => dtype.clone(),
72 None => u.arbitrary()?,
73 };
74 let len = u.int_in_range(config.len.clone())?;
75
76 random_array(u, &dtype, Some(len)).map(ArbitraryArray)
77 }
78}
79
80fn split_number_into_parts(n: usize, parts: usize) -> Vec<usize> {
81 let reminder = n % parts;
82 let division = (n - reminder) / parts;
83 iter::repeat_n(division, parts - reminder)
84 .chain(iter::repeat_n(division + 1, reminder))
85 .collect()
86}
87
88fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Result<ArrayRef> {
90 let num_chunks = u.int_in_range(1..=3)?;
91 let chunk_lens = len.map(|l| split_number_into_parts(l, num_chunks));
92 let mut chunks = (0..num_chunks)
93 .map(|i| {
94 let chunk_len = chunk_lens.as_ref().map(|c| c[i]);
95 random_array_chunk(u, dtype, chunk_len)
96 })
97 .collect::<Result<Vec<_>>>()?;
98
99 if chunks.len() == 1 {
100 Ok(chunks.remove(0))
101 } else {
102 let dtype = chunks[0].dtype().clone();
103 Ok(ChunkedArray::try_new(chunks, dtype)
104 .vortex_expect("operation should succeed in arbitrary impl")
105 .into_array())
106 }
107}
108
109fn random_array_chunk(
111 u: &mut Unstructured<'_>,
112 dtype: &DType,
113 chunk_len: Option<usize>,
114) -> Result<ArrayRef> {
115 match dtype {
116 DType::Null => Ok(NullArray::new(
117 chunk_len
118 .map(Ok)
119 .unwrap_or_else(|| u.int_in_range(0..=100))?,
120 )
121 .into_array()),
122 DType::Bool(n) => random_bool(u, *n, chunk_len),
123 DType::Primitive(ptype, n) => match ptype {
124 PType::U8 => random_primitive::<u8>(u, *n, chunk_len),
125 PType::U16 => random_primitive::<u16>(u, *n, chunk_len),
126 PType::U32 => random_primitive::<u32>(u, *n, chunk_len),
127 PType::U64 => random_primitive::<u64>(u, *n, chunk_len),
128 PType::I8 => random_primitive::<i8>(u, *n, chunk_len),
129 PType::I16 => random_primitive::<i16>(u, *n, chunk_len),
130 PType::I32 => random_primitive::<i32>(u, *n, chunk_len),
131 PType::I64 => random_primitive::<i64>(u, *n, chunk_len),
132 PType::F16 => {
133 #[expect(deprecated)]
134 let prim = random_primitive::<u16>(u, *n, chunk_len)?
135 .to_primitive()
136 .reinterpret_cast(PType::F16)
137 .into_array();
138 Ok(prim)
139 }
140 PType::F32 => random_primitive::<f32>(u, *n, chunk_len),
141 PType::F64 => random_primitive::<f64>(u, *n, chunk_len),
142 },
143 d @ DType::Decimal(decimal, n) => {
144 let elem_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
145 match_each_decimal_value_type!(DecimalType::smallest_decimal_value_type(decimal), |D| {
146 let mut builder = DecimalBuilder::new::<D>(*decimal, *n);
147 for _i in 0..elem_len {
148 let random_decimal = random_scalar(u, d)?;
149 builder.append_scalar(&random_decimal).vortex_expect(
150 "was somehow unable to append a decimal to a decimal builder",
151 );
152 }
153 Ok(builder.finish())
154 })
155 }
156 DType::Utf8(n) => random_string(u, *n, chunk_len),
157 DType::Binary(n) => random_bytes(u, *n, chunk_len),
158 DType::Struct(sdt, n) => {
159 let first_array = sdt
160 .fields()
161 .next()
162 .map(|d| random_array(u, &d, chunk_len))
163 .transpose()?;
164 let resolved_len = first_array
165 .as_ref()
166 .map(|a| a.len())
167 .or(chunk_len)
168 .map(Ok)
169 .unwrap_or_else(|| u.int_in_range(0..=100))?;
170 let children = first_array
171 .into_iter()
172 .map(Ok)
173 .chain(
174 sdt.fields()
175 .skip(1)
176 .map(|d| random_array(u, &d, Some(resolved_len))),
177 )
178 .collect::<Result<Vec<_>>>()?;
179 Ok(StructArray::try_new(
180 sdt.names().clone(),
181 children,
182 resolved_len,
183 random_validity(u, *n, resolved_len)?,
184 )
185 .vortex_expect("operation should succeed in arbitrary impl")
186 .into_array())
187 }
188 DType::List(elem_dtype, null) => random_list(u, elem_dtype, *null, chunk_len),
189 DType::FixedSizeList(elem_dtype, list_size, null) => {
190 random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
191 }
192 DType::Extension(..) => {
193 unimplemented!("Extension arrays are not implemented")
194 }
195 DType::Variant(_) => {
196 unimplemented!("Variant arrays are not implemented")
197 }
198 }
199}
200
201fn random_fixed_size_list(
205 u: &mut Unstructured,
206 elem_dtype: &Arc<DType>,
207 list_size: u32,
208 null: Nullability,
209 chunk_len: Option<usize>,
210) -> Result<ArrayRef> {
211 let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
212
213 let mut builder =
214 FixedSizeListBuilder::with_capacity(Arc::clone(elem_dtype), list_size, null, array_length);
215
216 for _ in 0..array_length {
217 if null == Nullability::Nullable && u.arbitrary::<bool>()? {
218 builder.append_null();
219 } else {
220 builder
221 .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
222 .vortex_expect("can append value");
223 }
224 }
225
226 Ok(builder.finish())
227}
228
229fn random_list(
233 u: &mut Unstructured,
234 elem_dtype: &Arc<DType>,
235 null: Nullability,
236 chunk_len: Option<usize>,
237) -> Result<ArrayRef> {
238 match u.int_in_range(0..=5)? {
239 0 => random_list_with_offset_type::<i16>(u, elem_dtype, null, chunk_len),
240 1 => random_list_with_offset_type::<i32>(u, elem_dtype, null, chunk_len),
241 2 => random_list_with_offset_type::<i64>(u, elem_dtype, null, chunk_len),
242 3 => random_list_with_offset_type::<u16>(u, elem_dtype, null, chunk_len),
243 4 => random_list_with_offset_type::<u32>(u, elem_dtype, null, chunk_len),
244 5 => random_list_with_offset_type::<u64>(u, elem_dtype, null, chunk_len),
245 _ => unreachable!("int_in_range returns a value in the above range"),
246 }
247}
248
249fn random_list_with_offset_type<O: IntegerPType>(
253 u: &mut Unstructured,
254 elem_dtype: &Arc<DType>,
255 null: Nullability,
256 chunk_len: Option<usize>,
257) -> Result<ArrayRef> {
258 let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
259
260 let mut builder = ListViewBuilder::<O, O>::with_capacity(Arc::clone(elem_dtype), null, 20, 10);
261
262 for _ in 0..array_length {
263 if null == Nullability::Nullable && u.arbitrary::<bool>()? {
264 builder.append_null();
265 } else {
266 let list_size = u.int_in_range(0..=20)?;
267 builder
268 .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
269 .vortex_expect("can append value");
270 }
271 }
272
273 Ok(builder.finish())
274}
275
276fn random_list_scalar(
278 u: &mut Unstructured,
279 elem_dtype: &Arc<DType>,
280 list_size: u32,
281 null: Nullability,
282) -> Result<Scalar> {
283 let elems = (0..list_size)
284 .map(|_| random_scalar(u, elem_dtype))
285 .collect::<Result<Vec<_>>>()?;
286 Ok(Scalar::list(Arc::clone(elem_dtype), elems, null))
287}
288
289fn random_string(
290 u: &mut Unstructured,
291 nullability: Nullability,
292 len: Option<usize>,
293) -> Result<ArrayRef> {
294 match nullability {
295 Nullability::NonNullable => {
296 let v = arbitrary_vec_of_len::<String>(u, len)?;
297 Ok(match u.int_in_range(0..=1)? {
298 0 => VarBinArray::from_vec(v, DType::Utf8(Nullability::NonNullable)).into_array(),
299 1 => VarBinViewArray::from_iter_str(v).into_array(),
300 _ => unreachable!(),
301 })
302 }
303 Nullability::Nullable => {
304 let v = arbitrary_vec_of_len::<Option<String>>(u, len)?;
305 Ok(match u.int_in_range(0..=1)? {
306 0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(),
307 1 => VarBinViewArray::from_iter_nullable_str(v).into_array(),
308 _ => unreachable!(),
309 })
310 }
311 }
312}
313
314fn random_bytes(
315 u: &mut Unstructured,
316 nullability: Nullability,
317 len: Option<usize>,
318) -> Result<ArrayRef> {
319 match nullability {
320 Nullability::NonNullable => {
321 let v = arbitrary_vec_of_len::<Vec<u8>>(u, len)?;
322 Ok(match u.int_in_range(0..=1)? {
323 0 => VarBinArray::from_vec(v, DType::Binary(Nullability::NonNullable)).into_array(),
324 1 => VarBinViewArray::from_iter_bin(v).into_array(),
325 _ => unreachable!(),
326 })
327 }
328 Nullability::Nullable => {
329 let v = arbitrary_vec_of_len::<Option<Vec<u8>>>(u, len)?;
330 Ok(match u.int_in_range(0..=1)? {
331 0 => VarBinArray::from_iter(v, DType::Binary(Nullability::Nullable)).into_array(),
332 1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(),
333 _ => unreachable!(),
334 })
335 }
336 }
337}
338
339fn random_primitive<'a, T: Arbitrary<'a> + NativePType>(
340 u: &mut Unstructured<'a>,
341 nullability: Nullability,
342 len: Option<usize>,
343) -> Result<ArrayRef> {
344 let v = arbitrary_vec_of_len::<T>(u, len)?;
345 let validity = random_validity(u, nullability, v.len())?;
346 Ok(PrimitiveArray::new(Buffer::copy_from(v), validity).into_array())
347}
348
349fn random_bool(
350 u: &mut Unstructured,
351 nullability: Nullability,
352 len: Option<usize>,
353) -> Result<ArrayRef> {
354 let v = arbitrary_vec_of_len(u, len)?;
355 let validity = random_validity(u, nullability, v.len())?;
356 Ok(BoolArray::new(BitBuffer::from(v), validity).into_array())
357}
358
359pub fn random_validity(
360 u: &mut Unstructured,
361 nullability: Nullability,
362 len: usize,
363) -> Result<Validity> {
364 match nullability {
365 Nullability::NonNullable => Ok(Validity::NonNullable),
366 Nullability::Nullable => Ok(match u.int_in_range(0..=2)? {
367 0 => Validity::AllValid,
368 1 => Validity::AllInvalid,
369 2 => Validity::from_iter(arbitrary_vec_of_len::<bool>(u, Some(len))?),
370 _ => unreachable!(),
371 }),
372 }
373}
374
375fn arbitrary_vec_of_len<'a, T: Arbitrary<'a>>(
376 u: &mut Unstructured<'a>,
377 len: Option<usize>,
378) -> Result<Vec<T>> {
379 len.map(|l| (0..l).map(|_| T::arbitrary(u)).collect::<Result<Vec<_>>>())
380 .unwrap_or_else(|| Vec::<T>::arbitrary(u))
381}