1use std::iter;
5use std::sync::Arc;
6
7use arbitrary::Arbitrary;
8use arbitrary::Result;
9use arbitrary::Unstructured;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::Buffer;
12use vortex_dtype::DType;
13use vortex_dtype::IntegerPType;
14use vortex_dtype::NativePType;
15use vortex_dtype::Nullability;
16use vortex_dtype::PType;
17use vortex_dtype::match_each_decimal_value_type;
18use vortex_error::VortexExpect;
19use vortex_scalar::Scalar;
20use vortex_scalar::arbitrary::random_scalar;
21
22use super::BoolArray;
23use super::ChunkedArray;
24use super::NullArray;
25use super::PrimitiveArray;
26use super::StructArray;
27use crate::Array;
28use crate::ArrayRef;
29use crate::IntoArray;
30use crate::ToCanonical;
31use crate::arrays::VarBinArray;
32use crate::arrays::VarBinViewArray;
33use crate::builders::ArrayBuilder;
34use crate::builders::DecimalBuilder;
35use crate::builders::FixedSizeListBuilder;
36use crate::builders::ListViewBuilder;
37use crate::validity::Validity;
38
39#[derive(Clone, Debug)]
41pub struct ArbitraryArray(pub ArrayRef);
42
43impl<'a> Arbitrary<'a> for ArbitraryArray {
44 fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
45 let dtype = u.arbitrary()?;
46 Self::arbitrary_with(u, None, &dtype)
47 }
48}
49
50impl ArbitraryArray {
51 pub fn arbitrary_with(u: &mut Unstructured, len: Option<usize>, dtype: &DType) -> Result<Self> {
52 random_array(u, dtype, len).map(ArbitraryArray)
53 }
54}
55
56fn split_number_into_parts(n: usize, parts: usize) -> Vec<usize> {
57 let reminder = n % parts;
58 let division = (n - reminder) / parts;
59 iter::repeat_n(division, parts - reminder)
60 .chain(iter::repeat_n(division + 1, reminder))
61 .collect()
62}
63
64fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Result<ArrayRef> {
66 let num_chunks = u.int_in_range(1..=3)?;
67 let chunk_lens = len.map(|l| split_number_into_parts(l, num_chunks));
68 let mut chunks = (0..num_chunks)
69 .map(|i| {
70 let chunk_len = chunk_lens.as_ref().map(|c| c[i]);
71 random_array_chunk(u, dtype, chunk_len)
72 })
73 .collect::<Result<Vec<_>>>()?;
74
75 if chunks.len() == 1 {
76 Ok(chunks.remove(0))
77 } else {
78 let dtype = chunks[0].dtype().clone();
79 Ok(ChunkedArray::try_new(chunks, dtype)
80 .vortex_expect("operation should succeed in arbitrary impl")
81 .into_array())
82 }
83}
84
85fn random_array_chunk(
87 u: &mut Unstructured<'_>,
88 dtype: &DType,
89 chunk_len: Option<usize>,
90) -> Result<ArrayRef> {
91 match dtype {
92 DType::Null => Ok(NullArray::new(
93 chunk_len
94 .map(Ok)
95 .unwrap_or_else(|| u.int_in_range(0..=100))?,
96 )
97 .into_array()),
98 DType::Bool(n) => random_bool(u, *n, chunk_len),
99 DType::Primitive(ptype, n) => match ptype {
100 PType::U8 => random_primitive::<u8>(u, *n, chunk_len),
101 PType::U16 => random_primitive::<u16>(u, *n, chunk_len),
102 PType::U32 => random_primitive::<u32>(u, *n, chunk_len),
103 PType::U64 => random_primitive::<u64>(u, *n, chunk_len),
104 PType::I8 => random_primitive::<i8>(u, *n, chunk_len),
105 PType::I16 => random_primitive::<i16>(u, *n, chunk_len),
106 PType::I32 => random_primitive::<i32>(u, *n, chunk_len),
107 PType::I64 => random_primitive::<i64>(u, *n, chunk_len),
108 PType::F16 => Ok(random_primitive::<u16>(u, *n, chunk_len)?
109 .to_primitive()
110 .reinterpret_cast(PType::F16)
111 .into_array()),
112 PType::F32 => random_primitive::<f32>(u, *n, chunk_len),
113 PType::F64 => random_primitive::<f64>(u, *n, chunk_len),
114 },
115 d @ DType::Decimal(decimal, n) => {
116 let elem_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
117 match_each_decimal_value_type!(
118 DecimalType::smallest_decimal_value_type(decimal),
119 |DVT| {
120 let mut builder = DecimalBuilder::new::<DVT>(*decimal, *n);
121 for _i in 0..elem_len {
122 let random_decimal = random_scalar(u, d)?;
123 builder.append_scalar(&random_decimal).vortex_expect(
124 "was somehow unable to append a decimal to a decimal builder",
125 );
126 }
127 Ok(builder.finish())
128 }
129 )
130 }
131 DType::Utf8(n) => random_string(u, *n, chunk_len),
132 DType::Binary(n) => random_bytes(u, *n, chunk_len),
133 DType::Struct(sdt, n) => {
134 let first_array = sdt
135 .fields()
136 .next()
137 .map(|d| random_array(u, &d, chunk_len))
138 .transpose()?;
139 let resolved_len = first_array
140 .as_ref()
141 .map(|a| a.len())
142 .or(chunk_len)
143 .map(Ok)
144 .unwrap_or_else(|| u.int_in_range(0..=100))?;
145 let children = first_array
146 .into_iter()
147 .map(Ok)
148 .chain(
149 sdt.fields()
150 .skip(1)
151 .map(|d| random_array(u, &d, Some(resolved_len))),
152 )
153 .collect::<Result<Vec<_>>>()?;
154 Ok(StructArray::try_new(
155 sdt.names().clone(),
156 children,
157 resolved_len,
158 random_validity(u, *n, resolved_len)?,
159 )
160 .vortex_expect("operation should succeed in arbitrary impl")
161 .into_array())
162 }
163 DType::List(elem_dtype, null) => random_list(u, elem_dtype, *null, chunk_len),
164 DType::FixedSizeList(elem_dtype, list_size, null) => {
165 random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
166 }
167 DType::Extension(..) => {
168 todo!("Extension arrays are not implemented")
169 }
170 }
171}
172
173fn random_fixed_size_list(
177 u: &mut Unstructured,
178 elem_dtype: &Arc<DType>,
179 list_size: u32,
180 null: Nullability,
181 chunk_len: Option<usize>,
182) -> Result<ArrayRef> {
183 let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
184
185 let mut builder =
186 FixedSizeListBuilder::with_capacity(elem_dtype.clone(), list_size, null, array_length);
187
188 for _ in 0..array_length {
189 if null == Nullability::Nullable && u.arbitrary::<bool>()? {
190 builder.append_null();
191 } else {
192 builder
193 .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
194 .vortex_expect("can append value");
195 }
196 }
197
198 Ok(builder.finish())
199}
200
201fn random_list(
205 u: &mut Unstructured,
206 elem_dtype: &Arc<DType>,
207 null: Nullability,
208 chunk_len: Option<usize>,
209) -> Result<ArrayRef> {
210 match u.int_in_range(0..=5)? {
211 0 => random_list_with_offset_type::<i16>(u, elem_dtype, null, chunk_len),
212 1 => random_list_with_offset_type::<i32>(u, elem_dtype, null, chunk_len),
213 2 => random_list_with_offset_type::<i64>(u, elem_dtype, null, chunk_len),
214 3 => random_list_with_offset_type::<u16>(u, elem_dtype, null, chunk_len),
215 4 => random_list_with_offset_type::<u32>(u, elem_dtype, null, chunk_len),
216 5 => random_list_with_offset_type::<u64>(u, elem_dtype, null, chunk_len),
217 _ => unreachable!("int_in_range returns a value in the above range"),
218 }
219}
220
221fn random_list_with_offset_type<O: IntegerPType>(
225 u: &mut Unstructured,
226 elem_dtype: &Arc<DType>,
227 null: Nullability,
228 chunk_len: Option<usize>,
229) -> Result<ArrayRef> {
230 let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
231
232 let mut builder = ListViewBuilder::<O, O>::with_capacity(elem_dtype.clone(), null, 20, 10);
233
234 for _ in 0..array_length {
235 if null == Nullability::Nullable && u.arbitrary::<bool>()? {
236 builder.append_null();
237 } else {
238 let list_size = u.int_in_range(0..=20)?;
239 builder
240 .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
241 .vortex_expect("can append value");
242 }
243 }
244
245 Ok(builder.finish())
246}
247
248fn random_list_scalar(
250 u: &mut Unstructured,
251 elem_dtype: &Arc<DType>,
252 list_size: u32,
253 null: Nullability,
254) -> Result<Scalar> {
255 let elems = (0..list_size)
256 .map(|_| random_scalar(u, elem_dtype))
257 .collect::<Result<Vec<_>>>()?;
258 Ok(Scalar::list(elem_dtype.clone(), elems, null))
259}
260
261fn random_string(
262 u: &mut Unstructured,
263 nullability: Nullability,
264 len: Option<usize>,
265) -> Result<ArrayRef> {
266 match nullability {
267 Nullability::NonNullable => {
268 let v = arbitrary_vec_of_len::<String>(u, len)?;
269 Ok(match u.int_in_range(0..=1)? {
270 0 => VarBinArray::from_vec(v, DType::Utf8(Nullability::NonNullable)).into_array(),
271 1 => VarBinViewArray::from_iter_str(v).into_array(),
272 _ => unreachable!(),
273 })
274 }
275 Nullability::Nullable => {
276 let v = arbitrary_vec_of_len::<Option<String>>(u, len)?;
277 Ok(match u.int_in_range(0..=1)? {
278 0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(),
279 1 => VarBinViewArray::from_iter_nullable_str(v).into_array(),
280 _ => unreachable!(),
281 })
282 }
283 }
284}
285
286fn random_bytes(
287 u: &mut Unstructured,
288 nullability: Nullability,
289 len: Option<usize>,
290) -> Result<ArrayRef> {
291 match nullability {
292 Nullability::NonNullable => {
293 let v = arbitrary_vec_of_len::<Vec<u8>>(u, len)?;
294 Ok(match u.int_in_range(0..=1)? {
295 0 => VarBinArray::from_vec(v, DType::Binary(Nullability::NonNullable)).into_array(),
296 1 => VarBinViewArray::from_iter_bin(v).into_array(),
297 _ => unreachable!(),
298 })
299 }
300 Nullability::Nullable => {
301 let v = arbitrary_vec_of_len::<Option<Vec<u8>>>(u, len)?;
302 Ok(match u.int_in_range(0..=1)? {
303 0 => VarBinArray::from_iter(v, DType::Binary(Nullability::Nullable)).into_array(),
304 1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(),
305 _ => unreachable!(),
306 })
307 }
308 }
309}
310
311fn random_primitive<'a, T: Arbitrary<'a> + NativePType>(
312 u: &mut Unstructured<'a>,
313 nullability: Nullability,
314 len: Option<usize>,
315) -> Result<ArrayRef> {
316 let v = arbitrary_vec_of_len::<T>(u, len)?;
317 let validity = random_validity(u, nullability, v.len())?;
318 Ok(PrimitiveArray::new(Buffer::copy_from(v), validity).into_array())
319}
320
321fn random_bool(
322 u: &mut Unstructured,
323 nullability: Nullability,
324 len: Option<usize>,
325) -> Result<ArrayRef> {
326 let v = arbitrary_vec_of_len(u, len)?;
327 let validity = random_validity(u, nullability, v.len())?;
328 Ok(BoolArray::from_bit_buffer(BitBuffer::from(v), validity).into_array())
329}
330
331fn random_validity(u: &mut Unstructured, nullability: Nullability, len: usize) -> Result<Validity> {
332 match nullability {
333 Nullability::NonNullable => Ok(Validity::NonNullable),
334 Nullability::Nullable => Ok(match u.int_in_range(0..=2)? {
335 0 => Validity::AllValid,
336 1 => Validity::AllInvalid,
337 2 => Validity::from_iter(arbitrary_vec_of_len::<bool>(u, Some(len))?),
338 _ => unreachable!(),
339 }),
340 }
341}
342
343fn arbitrary_vec_of_len<'a, T: Arbitrary<'a>>(
344 u: &mut Unstructured<'a>,
345 len: Option<usize>,
346) -> Result<Vec<T>> {
347 len.map(|l| (0..l).map(|_| T::arbitrary(u)).collect::<Result<Vec<_>>>())
348 .unwrap_or_else(|| Vec::<T>::arbitrary(u))
349}