1use std::iter;
5use std::sync::Arc;
6
7use arbitrary::Arbitrary;
8use arbitrary::Result;
9use arbitrary::Unstructured;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::Buffer;
12use vortex_error::VortexExpect;
13
14use super::BoolArray;
15use super::ChunkedArray;
16use super::NullArray;
17use super::PrimitiveArray;
18use super::StructArray;
19use crate::Array;
20use crate::ArrayRef;
21use crate::IntoArray;
22use crate::ToCanonical;
23use crate::arrays::VarBinArray;
24use crate::arrays::VarBinViewArray;
25use crate::builders::ArrayBuilder;
26use crate::builders::DecimalBuilder;
27use crate::builders::FixedSizeListBuilder;
28use crate::builders::ListViewBuilder;
29use crate::dtype::DType;
30use crate::dtype::IntegerPType;
31use crate::dtype::NativePType;
32use crate::dtype::Nullability;
33use crate::dtype::PType;
34use crate::match_each_decimal_value_type;
35use crate::scalar::Scalar;
36use crate::scalar::arbitrary::random_scalar;
37use crate::validity::Validity;
38
39#[derive(Clone, Debug)]
41pub struct ArbitraryArray(pub ArrayRef);
42
43impl<'a> Arbitrary<'a> for ArbitraryArray {
44 fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
45 let dtype = u.arbitrary()?;
46 Self::arbitrary_with(u, None, &dtype)
47 }
48}
49
50impl ArbitraryArray {
51 pub fn arbitrary_with(u: &mut Unstructured, len: Option<usize>, dtype: &DType) -> Result<Self> {
52 random_array(u, dtype, len).map(ArbitraryArray)
53 }
54}
55
56fn split_number_into_parts(n: usize, parts: usize) -> Vec<usize> {
57 let reminder = n % parts;
58 let division = (n - reminder) / parts;
59 iter::repeat_n(division, parts - reminder)
60 .chain(iter::repeat_n(division + 1, reminder))
61 .collect()
62}
63
64fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Result<ArrayRef> {
66 let num_chunks = u.int_in_range(1..=3)?;
67 let chunk_lens = len.map(|l| split_number_into_parts(l, num_chunks));
68 let mut chunks = (0..num_chunks)
69 .map(|i| {
70 let chunk_len = chunk_lens.as_ref().map(|c| c[i]);
71 random_array_chunk(u, dtype, chunk_len)
72 })
73 .collect::<Result<Vec<_>>>()?;
74
75 if chunks.len() == 1 {
76 Ok(chunks.remove(0))
77 } else {
78 let dtype = chunks[0].dtype().clone();
79 Ok(ChunkedArray::try_new(chunks, dtype)
80 .vortex_expect("operation should succeed in arbitrary impl")
81 .into_array())
82 }
83}
84
85fn random_array_chunk(
87 u: &mut Unstructured<'_>,
88 dtype: &DType,
89 chunk_len: Option<usize>,
90) -> Result<ArrayRef> {
91 match dtype {
92 DType::Null => Ok(NullArray::new(
93 chunk_len
94 .map(Ok)
95 .unwrap_or_else(|| u.int_in_range(0..=100))?,
96 )
97 .into_array()),
98 DType::Bool(n) => random_bool(u, *n, chunk_len),
99 DType::Primitive(ptype, n) => match ptype {
100 PType::U8 => random_primitive::<u8>(u, *n, chunk_len),
101 PType::U16 => random_primitive::<u16>(u, *n, chunk_len),
102 PType::U32 => random_primitive::<u32>(u, *n, chunk_len),
103 PType::U64 => random_primitive::<u64>(u, *n, chunk_len),
104 PType::I8 => random_primitive::<i8>(u, *n, chunk_len),
105 PType::I16 => random_primitive::<i16>(u, *n, chunk_len),
106 PType::I32 => random_primitive::<i32>(u, *n, chunk_len),
107 PType::I64 => random_primitive::<i64>(u, *n, chunk_len),
108 PType::F16 => Ok(random_primitive::<u16>(u, *n, chunk_len)?
109 .to_primitive()
110 .reinterpret_cast(PType::F16)
111 .into_array()),
112 PType::F32 => random_primitive::<f32>(u, *n, chunk_len),
113 PType::F64 => random_primitive::<f64>(u, *n, chunk_len),
114 },
115 d @ DType::Decimal(decimal, n) => {
116 let elem_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
117 match_each_decimal_value_type!(DecimalType::smallest_decimal_value_type(decimal), |D| {
118 let mut builder = DecimalBuilder::new::<D>(*decimal, *n);
119 for _i in 0..elem_len {
120 let random_decimal = random_scalar(u, d)?;
121 builder.append_scalar(&random_decimal).vortex_expect(
122 "was somehow unable to append a decimal to a decimal builder",
123 );
124 }
125 Ok(builder.finish())
126 })
127 }
128 DType::Utf8(n) => random_string(u, *n, chunk_len),
129 DType::Binary(n) => random_bytes(u, *n, chunk_len),
130 DType::Struct(sdt, n) => {
131 let first_array = sdt
132 .fields()
133 .next()
134 .map(|d| random_array(u, &d, chunk_len))
135 .transpose()?;
136 let resolved_len = first_array
137 .as_ref()
138 .map(|a| a.len())
139 .or(chunk_len)
140 .map(Ok)
141 .unwrap_or_else(|| u.int_in_range(0..=100))?;
142 let children = first_array
143 .into_iter()
144 .map(Ok)
145 .chain(
146 sdt.fields()
147 .skip(1)
148 .map(|d| random_array(u, &d, Some(resolved_len))),
149 )
150 .collect::<Result<Vec<_>>>()?;
151 Ok(StructArray::try_new(
152 sdt.names().clone(),
153 children,
154 resolved_len,
155 random_validity(u, *n, resolved_len)?,
156 )
157 .vortex_expect("operation should succeed in arbitrary impl")
158 .into_array())
159 }
160 DType::List(elem_dtype, null) => random_list(u, elem_dtype, *null, chunk_len),
161 DType::FixedSizeList(elem_dtype, list_size, null) => {
162 random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len)
163 }
164 DType::Extension(..) => {
165 todo!("Extension arrays are not implemented")
166 }
167 }
168}
169
170fn random_fixed_size_list(
174 u: &mut Unstructured,
175 elem_dtype: &Arc<DType>,
176 list_size: u32,
177 null: Nullability,
178 chunk_len: Option<usize>,
179) -> Result<ArrayRef> {
180 let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
181
182 let mut builder =
183 FixedSizeListBuilder::with_capacity(elem_dtype.clone(), list_size, null, array_length);
184
185 for _ in 0..array_length {
186 if null == Nullability::Nullable && u.arbitrary::<bool>()? {
187 builder.append_null();
188 } else {
189 builder
190 .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
191 .vortex_expect("can append value");
192 }
193 }
194
195 Ok(builder.finish())
196}
197
198fn random_list(
202 u: &mut Unstructured,
203 elem_dtype: &Arc<DType>,
204 null: Nullability,
205 chunk_len: Option<usize>,
206) -> Result<ArrayRef> {
207 match u.int_in_range(0..=5)? {
208 0 => random_list_with_offset_type::<i16>(u, elem_dtype, null, chunk_len),
209 1 => random_list_with_offset_type::<i32>(u, elem_dtype, null, chunk_len),
210 2 => random_list_with_offset_type::<i64>(u, elem_dtype, null, chunk_len),
211 3 => random_list_with_offset_type::<u16>(u, elem_dtype, null, chunk_len),
212 4 => random_list_with_offset_type::<u32>(u, elem_dtype, null, chunk_len),
213 5 => random_list_with_offset_type::<u64>(u, elem_dtype, null, chunk_len),
214 _ => unreachable!("int_in_range returns a value in the above range"),
215 }
216}
217
218fn random_list_with_offset_type<O: IntegerPType>(
222 u: &mut Unstructured,
223 elem_dtype: &Arc<DType>,
224 null: Nullability,
225 chunk_len: Option<usize>,
226) -> Result<ArrayRef> {
227 let array_length = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
228
229 let mut builder = ListViewBuilder::<O, O>::with_capacity(elem_dtype.clone(), null, 20, 10);
230
231 for _ in 0..array_length {
232 if null == Nullability::Nullable && u.arbitrary::<bool>()? {
233 builder.append_null();
234 } else {
235 let list_size = u.int_in_range(0..=20)?;
236 builder
237 .append_value(random_list_scalar(u, elem_dtype, list_size, null)?.as_list())
238 .vortex_expect("can append value");
239 }
240 }
241
242 Ok(builder.finish())
243}
244
245fn random_list_scalar(
247 u: &mut Unstructured,
248 elem_dtype: &Arc<DType>,
249 list_size: u32,
250 null: Nullability,
251) -> Result<Scalar> {
252 let elems = (0..list_size)
253 .map(|_| random_scalar(u, elem_dtype))
254 .collect::<Result<Vec<_>>>()?;
255 Ok(Scalar::list(elem_dtype.clone(), elems, null))
256}
257
258fn random_string(
259 u: &mut Unstructured,
260 nullability: Nullability,
261 len: Option<usize>,
262) -> Result<ArrayRef> {
263 match nullability {
264 Nullability::NonNullable => {
265 let v = arbitrary_vec_of_len::<String>(u, len)?;
266 Ok(match u.int_in_range(0..=1)? {
267 0 => VarBinArray::from_vec(v, DType::Utf8(Nullability::NonNullable)).into_array(),
268 1 => VarBinViewArray::from_iter_str(v).into_array(),
269 _ => unreachable!(),
270 })
271 }
272 Nullability::Nullable => {
273 let v = arbitrary_vec_of_len::<Option<String>>(u, len)?;
274 Ok(match u.int_in_range(0..=1)? {
275 0 => VarBinArray::from_iter(v, DType::Utf8(Nullability::Nullable)).into_array(),
276 1 => VarBinViewArray::from_iter_nullable_str(v).into_array(),
277 _ => unreachable!(),
278 })
279 }
280 }
281}
282
283fn random_bytes(
284 u: &mut Unstructured,
285 nullability: Nullability,
286 len: Option<usize>,
287) -> Result<ArrayRef> {
288 match nullability {
289 Nullability::NonNullable => {
290 let v = arbitrary_vec_of_len::<Vec<u8>>(u, len)?;
291 Ok(match u.int_in_range(0..=1)? {
292 0 => VarBinArray::from_vec(v, DType::Binary(Nullability::NonNullable)).into_array(),
293 1 => VarBinViewArray::from_iter_bin(v).into_array(),
294 _ => unreachable!(),
295 })
296 }
297 Nullability::Nullable => {
298 let v = arbitrary_vec_of_len::<Option<Vec<u8>>>(u, len)?;
299 Ok(match u.int_in_range(0..=1)? {
300 0 => VarBinArray::from_iter(v, DType::Binary(Nullability::Nullable)).into_array(),
301 1 => VarBinViewArray::from_iter_nullable_bin(v).into_array(),
302 _ => unreachable!(),
303 })
304 }
305 }
306}
307
308fn random_primitive<'a, T: Arbitrary<'a> + NativePType>(
309 u: &mut Unstructured<'a>,
310 nullability: Nullability,
311 len: Option<usize>,
312) -> Result<ArrayRef> {
313 let v = arbitrary_vec_of_len::<T>(u, len)?;
314 let validity = random_validity(u, nullability, v.len())?;
315 Ok(PrimitiveArray::new(Buffer::copy_from(v), validity).into_array())
316}
317
318fn random_bool(
319 u: &mut Unstructured,
320 nullability: Nullability,
321 len: Option<usize>,
322) -> Result<ArrayRef> {
323 let v = arbitrary_vec_of_len(u, len)?;
324 let validity = random_validity(u, nullability, v.len())?;
325 Ok(BoolArray::new(BitBuffer::from(v), validity).into_array())
326}
327
328pub fn random_validity(
329 u: &mut Unstructured,
330 nullability: Nullability,
331 len: usize,
332) -> Result<Validity> {
333 match nullability {
334 Nullability::NonNullable => Ok(Validity::NonNullable),
335 Nullability::Nullable => Ok(match u.int_in_range(0..=2)? {
336 0 => Validity::AllValid,
337 1 => Validity::AllInvalid,
338 2 => Validity::from_iter(arbitrary_vec_of_len::<bool>(u, Some(len))?),
339 _ => unreachable!(),
340 }),
341 }
342}
343
344fn arbitrary_vec_of_len<'a, T: Arbitrary<'a>>(
345 u: &mut Unstructured<'a>,
346 len: Option<usize>,
347) -> Result<Vec<T>> {
348 len.map(|l| (0..l).map(|_| T::arbitrary(u)).collect::<Result<Vec<_>>>())
349 .unwrap_or_else(|| Vec::<T>::arbitrary(u))
350}