1use arrow_array::*;
21use arrow_array::{cast::AsArray, types::*};
22use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
23use arrow_schema::{ArrowError, DataType};
24use std::sync::Arc;
25macro_rules! ree_length {
26 ($array:expr, $run_type:ty, $k:expr, $v:expr) => {{
27 let ree = $array.as_run_opt::<$run_type>().unwrap();
28 let inner_value_lengths = length(ree.values().as_ref())?;
29 let out_ree = unsafe {
30 RunArray::<$run_type>::new_unchecked(
31 DataType::RunEndEncoded(Arc::clone($k), Arc::clone($v)),
32 ree.run_ends().clone(),
33 inner_value_lengths,
34 )
35 };
36 Ok(Arc::new(out_ree) as ArrayRef)
37 }};
38}
39
40fn length_impl<P: ArrowPrimitiveType>(
41 offsets: &OffsetBuffer<P::Native>,
42 nulls: Option<&NullBuffer>,
43) -> ArrayRef {
44 let v: Vec<_> = offsets
45 .windows(2)
46 .map(|w| w[1].sub_wrapping(w[0]))
47 .collect();
48 Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
49}
50
51fn bit_length_impl<P: ArrowPrimitiveType>(
52 offsets: &OffsetBuffer<P::Native>,
53 nulls: Option<&NullBuffer>,
54) -> ArrayRef {
55 let bits = P::Native::usize_as(8);
56 let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
57 let v: Vec<_> = offsets.windows(2).map(c).collect();
58 Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
59}
60
61pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
71 if let Some(d) = array.as_any_dictionary_opt() {
72 let lengths = length(d.values().as_ref())?;
73 return Ok(d.with_values(lengths));
74 }
75 match array.data_type() {
76 DataType::List(_) => {
77 let list = array.as_list::<i32>();
78 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
79 }
80 DataType::LargeList(_) => {
81 let list = array.as_list::<i64>();
82 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
83 }
84 DataType::ListView(_) => {
85 let list = array.as_list_view::<i32>();
86 Ok(Arc::new(Int32Array::new(
87 list.sizes().clone(),
88 list.nulls().cloned(),
89 )))
90 }
91 DataType::LargeListView(_) => {
92 let list = array.as_list_view::<i64>();
93 Ok(Arc::new(Int64Array::new(
94 list.sizes().clone(),
95 list.nulls().cloned(),
96 )))
97 }
98 DataType::Utf8 => {
99 let list = array.as_string::<i32>();
100 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
101 }
102 DataType::LargeUtf8 => {
103 let list = array.as_string::<i64>();
104 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
105 }
106 DataType::Utf8View => {
107 let list = array.as_string_view();
108 let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
109 Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
110 v.into(),
111 list.nulls().cloned(),
112 )?))
113 }
114 DataType::Binary => {
115 let list = array.as_binary::<i32>();
116 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
117 }
118 DataType::LargeBinary => {
119 let list = array.as_binary::<i64>();
120 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
121 }
122 DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
123 Int32Array::try_new(vec![*len; array.len()].into(), array.nulls().cloned())?,
124 )),
125 DataType::BinaryView => {
126 let list = array.as_binary_view();
127 let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
128 Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
129 v.into(),
130 list.nulls().cloned(),
131 )?))
132 }
133 DataType::RunEndEncoded(k, v) => match k.data_type() {
134 DataType::Int16 => ree_length!(array, Int16Type, &k, &v),
135 DataType::Int32 => ree_length!(array, Int32Type, &k, &v),
136 DataType::Int64 => ree_length!(array, Int64Type, &k, &v),
137 _ => Err(ArrowError::InvalidArgumentError(format!(
138 "Invalid run-end type: {:?}",
139 k.data_type()
140 ))),
141 },
142 other => Err(ArrowError::ComputeError(format!(
143 "length not supported for {other:?}"
144 ))),
145 }
146}
147
148pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
156 if let Some(d) = array.as_any_dictionary_opt() {
157 let lengths = bit_length(d.values().as_ref())?;
158 return Ok(d.with_values(lengths));
159 }
160
161 match array.data_type() {
162 DataType::Utf8 => {
163 let list = array.as_string::<i32>();
164 Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
165 }
166 DataType::LargeUtf8 => {
167 let list = array.as_string::<i64>();
168 Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
169 }
170 DataType::Utf8View => {
171 let list = array.as_string_view();
172 let values = list
173 .views()
174 .iter()
175 .map(|view| (*view as i32).wrapping_mul(8))
176 .collect();
177 Ok(Arc::new(Int32Array::try_new(
178 values,
179 array.nulls().cloned(),
180 )?))
181 }
182 DataType::Binary => {
183 let list = array.as_binary::<i32>();
184 Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
185 }
186 DataType::LargeBinary => {
187 let list = array.as_binary::<i64>();
188 Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
189 }
190 DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
191 vec![*len * 8; array.len()].into(),
192 array.nulls().cloned(),
193 )?)),
194 DataType::BinaryView => {
195 let list = array.as_binary_view();
196 let values = list
197 .views()
198 .iter()
199 .map(|view| (*view as i32).wrapping_mul(8))
200 .collect();
201 Ok(Arc::new(Int32Array::try_new(
202 values,
203 array.nulls().cloned(),
204 )?))
205 }
206 other => Err(ArrowError::ComputeError(format!(
207 "bit_length not supported for {other:?}"
208 ))),
209 }
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215 use arrow_buffer::{Buffer, ScalarBuffer};
216 use arrow_data::ArrayData;
217 use arrow_schema::Field;
218
219 fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
220 let values = [
222 "one",
223 "on",
224 "o",
225 "",
226 "this is a longer string to test string array with",
227 ];
228 let values = values.into_iter().cycle().take(4096).collect();
229 let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
230
231 vec![
232 (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
233 (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
234 (vec!["💖"], 1, vec![4]),
235 (values, 4096, expected),
236 ]
237 }
238
239 macro_rules! length_binary_helper {
240 ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
241 let array = GenericBinaryArray::<$offset_ty>::from($value);
242 let result = $kernel(&array).unwrap();
243 let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
244 let expected: $result_ty = $expected.into();
245 assert_eq!(&expected, result);
246 }};
247 }
248
249 macro_rules! length_list_helper {
250 ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
251 let array =
252 GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
253 let result = length(&array).unwrap();
254 let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
255 let expected: $result_ty = $expected.into();
256 assert_eq!(&expected, result);
257 }};
258 }
259
260 #[test]
261 fn length_test_string() {
262 length_cases_string()
263 .into_iter()
264 .for_each(|(input, len, expected)| {
265 let array = StringArray::from(input);
266 let result = length(&array).unwrap();
267 assert_eq!(len, result.len());
268 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
269 expected.iter().enumerate().for_each(|(i, value)| {
270 assert_eq!(*value, result.value(i));
271 });
272 })
273 }
274
275 #[test]
276 fn length_test_large_string() {
277 length_cases_string()
278 .into_iter()
279 .for_each(|(input, len, expected)| {
280 let array = LargeStringArray::from(input);
281 let result = length(&array).unwrap();
282 assert_eq!(len, result.len());
283 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
284 expected.iter().enumerate().for_each(|(i, value)| {
285 assert_eq!(*value as i64, result.value(i));
286 });
287 })
288 }
289
290 #[test]
291 fn length_test_string_view() {
292 length_cases_string()
293 .into_iter()
294 .for_each(|(input, len, expected)| {
295 let array = StringViewArray::from(input);
296 let result = length(&array).unwrap();
297 assert_eq!(len, result.len());
298 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
299 expected.iter().enumerate().for_each(|(i, value)| {
300 assert_eq!(*value, result.value(i));
301 });
302 })
303 }
304
305 #[test]
306 fn length_test_binary() {
307 let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
308 let result: Vec<i32> = vec![4, 3, 2];
309 length_binary_helper!(i32, Int32Array, length, value, result)
310 }
311
312 #[test]
313 fn length_test_large_binary() {
314 let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
315 let result: Vec<i64> = vec![4, 2, 3];
316 length_binary_helper!(i64, Int64Array, length, value, result)
317 }
318
319 #[test]
320 fn length_test_binary_view() {
321 let value: Vec<&[u8]> = vec![
322 b"zero",
323 &[0xff, 0xf8],
324 b"two",
325 b"this is a longer string to test binary array with",
326 ];
327 let expected: Vec<i32> = vec![4, 2, 3, 49];
328
329 let array = BinaryViewArray::from(value);
330 let result = length(&array).unwrap();
331 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
332 let expected: Int32Array = expected.into();
333 assert_eq!(&expected, result);
334 }
335
336 #[test]
337 fn length_test_list() {
338 let value = vec![
339 Some(vec![]),
340 Some(vec![Some(1), Some(2), Some(4)]),
341 Some(vec![Some(0)]),
342 ];
343 let result: Vec<i32> = vec![0, 3, 1];
344 length_list_helper!(i32, Int32Array, Int32Type, value, result)
345 }
346
347 #[test]
348 fn length_test_large_list() {
349 let value = vec![
350 Some(vec![]),
351 Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
352 Some(vec![None]),
353 ];
354 let result: Vec<i64> = vec![0, 3, 1];
355 length_list_helper!(i64, Int64Array, Float32Type, value, result)
356 }
357
358 type OptionStr = Option<&'static str>;
359
360 fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
361 vec![(
362 vec![Some("one"), None, Some("three"), Some("four")],
363 4,
364 vec![Some(3), None, Some(5), Some(4)],
365 )]
366 }
367
368 #[test]
369 fn length_null_string() {
370 length_null_cases_string()
371 .into_iter()
372 .for_each(|(input, len, expected)| {
373 let array = StringArray::from(input);
374 let result = length(&array).unwrap();
375 assert_eq!(len, result.len());
376 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
377
378 let expected: Int32Array = expected.into();
379 assert_eq!(&expected, result);
380 })
381 }
382
383 #[test]
384 fn length_null_large_string() {
385 length_null_cases_string()
386 .into_iter()
387 .for_each(|(input, len, expected)| {
388 let array = LargeStringArray::from(input);
389 let result = length(&array).unwrap();
390 assert_eq!(len, result.len());
391 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
392
393 let expected: Int64Array = expected
395 .iter()
396 .map(|e| e.map(|e| e as i64))
397 .collect::<Vec<_>>()
398 .into();
399 assert_eq!(&expected, result);
400 })
401 }
402
403 #[test]
404 fn length_null_binary() {
405 let value: Vec<Option<&[u8]>> =
406 vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
407 let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
408 length_binary_helper!(i32, Int32Array, length, value, result)
409 }
410
411 #[test]
412 fn length_null_large_binary() {
413 let value: Vec<Option<&[u8]>> =
414 vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
415 let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
416 length_binary_helper!(i64, Int64Array, length, value, result)
417 }
418
419 #[test]
420 fn length_null_list() {
421 let value = vec![
422 Some(vec![]),
423 None,
424 Some(vec![Some(1), None, Some(2), Some(4)]),
425 Some(vec![Some(0)]),
426 ];
427 let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
428 length_list_helper!(i32, Int32Array, Int8Type, value, result)
429 }
430
431 #[test]
432 fn length_null_large_list() {
433 let value = vec![
434 Some(vec![]),
435 None,
436 Some(vec![Some(1.1), None, Some(4.0)]),
437 Some(vec![Some(0.1)]),
438 ];
439 let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
440 length_list_helper!(i64, Int64Array, Float32Type, value, result)
441 }
442
443 #[test]
444 fn length_test_list_view() {
445 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
447 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
448 let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
449 let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
450 let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
451
452 let result = length(&list_array).unwrap();
453 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
454 let expected: Int32Array = vec![3, 3, 2].into();
455 assert_eq!(&expected, result);
456 }
457
458 #[test]
459 fn length_test_large_list_view() {
460 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
462 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
463 let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
464 let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
465 let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
466
467 let result = length(&list_array).unwrap();
468 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
469 let expected: Int64Array = vec![3i64, 3, 2].into();
470 assert_eq!(&expected, result);
471 }
472
473 #[test]
474 fn length_null_list_view() {
475 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
477 let values = Int32Array::from(vec![1, 2, 3, 4, 0]);
478 let offsets = ScalarBuffer::from(vec![0i32, 0, 0, 4]);
479 let sizes = ScalarBuffer::from(vec![0i32, 0, 4, 1]);
480 let nulls = NullBuffer::from(vec![true, false, true, true]);
481 let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
482
483 let result = length(&list_array).unwrap();
484 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
485 let expected: Int32Array = vec![Some(0), None, Some(4), Some(1)].into();
486 assert_eq!(&expected, result);
487 }
488
489 #[test]
490 fn length_null_large_list_view() {
491 let field = Arc::new(Field::new_list_field(DataType::Float32, true));
493 let values = Float32Array::from(vec![1.0, 2.0, 3.0, 0.1]);
494 let offsets = ScalarBuffer::from(vec![0i64, 0, 0, 3]);
495 let sizes = ScalarBuffer::from(vec![0i64, 0, 3, 1]);
496 let nulls = NullBuffer::from(vec![true, false, true, true]);
497 let list_array =
498 LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
499
500 let result = length(&list_array).unwrap();
501 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
502 let expected: Int64Array = vec![Some(0i64), None, Some(3), Some(1)].into();
503 assert_eq!(&expected, result);
504 }
505
506 #[test]
508 fn length_wrong_type() {
509 let array: UInt64Array = vec![1u64].into();
510
511 assert!(length(&array).is_err());
512 }
513
514 #[test]
516 fn length_offsets_string() {
517 let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
518 let b = a.slice(1, 3);
519 let result = length(&b).unwrap();
520 let result: &Int32Array = result.as_primitive();
521
522 let expected = Int32Array::from(vec![Some(1), Some(5), None]);
523 assert_eq!(&expected, result);
524 }
525
526 #[test]
527 fn length_offsets_binary() {
528 let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
529 let a = BinaryArray::from(value);
530 let b = a.slice(1, 3);
531 let result = length(&b).unwrap();
532 let result: &Int32Array = result.as_primitive();
533
534 let expected = Int32Array::from(vec![Some(1), Some(2), None]);
535 assert_eq!(&expected, result);
536 }
537
538 fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
539 let values = ["one", "on", "o", ""];
541 let values = values.into_iter().cycle().take(4096).collect();
542 let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
543
544 vec![
545 (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
546 (vec!["💖"], 1, vec![32]),
547 (vec!["josé"], 1, vec![40]),
548 (values, 4096, expected),
549 ]
550 }
551
552 #[test]
553 fn bit_length_test_string() {
554 bit_length_cases()
555 .into_iter()
556 .for_each(|(input, len, expected)| {
557 let array = StringArray::from(input);
558 let result = bit_length(&array).unwrap();
559 assert_eq!(len, result.len());
560 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
561 expected.iter().enumerate().for_each(|(i, value)| {
562 assert_eq!(*value, result.value(i));
563 });
564 })
565 }
566
567 #[test]
568 fn bit_length_test_large_string() {
569 bit_length_cases()
570 .into_iter()
571 .for_each(|(input, len, expected)| {
572 let array = LargeStringArray::from(input);
573 let result = bit_length(&array).unwrap();
574 assert_eq!(len, result.len());
575 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
576 expected.iter().enumerate().for_each(|(i, value)| {
577 assert_eq!(*value as i64, result.value(i));
578 });
579 })
580 }
581
582 #[test]
583 fn bit_length_test_utf8view() {
584 bit_length_cases()
585 .into_iter()
586 .for_each(|(input, len, expected)| {
587 let string_array = StringViewArray::from(input);
588 let result = bit_length(&string_array).unwrap();
589 assert_eq!(len, result.len());
590 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
591 expected.iter().enumerate().for_each(|(i, value)| {
592 assert_eq!(*value, result.value(i));
593 });
594 })
595 }
596
597 #[test]
598 fn bit_length_null_utf8view() {
599 bit_length_null_cases()
600 .into_iter()
601 .for_each(|(input, len, expected)| {
602 let array = StringArray::from(input);
603 let result = bit_length(&array).unwrap();
604 assert_eq!(len, result.len());
605 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
606
607 let expected: Int32Array = expected.into();
608 assert_eq!(&expected, result);
609 })
610 }
611 #[test]
612 fn bit_length_binary() {
613 let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
614 let expected: Vec<i32> = vec![24, 16, 40];
615 length_binary_helper!(i32, Int32Array, bit_length, value, expected)
616 }
617
618 #[test]
619 fn bit_length_large_binary() {
620 let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
621 let expected: Vec<i64> = vec![32, 8, 16];
622 length_binary_helper!(i64, Int64Array, bit_length, value, expected)
623 }
624
625 #[test]
626 fn bit_length_binary_view() {
627 let value: Vec<&[u8]> = vec![
628 b"zero",
629 &[0xff, 0xf8],
630 b"two",
631 b"this is a longer string to test binary array with",
632 ];
633 let expected: Vec<i32> = vec![32, 16, 24, 392];
634
635 let array = BinaryViewArray::from(value);
636 let result = bit_length(&array).unwrap();
637 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
638 let expected: Int32Array = expected.into();
639 assert_eq!(&expected, result);
640 }
641
642 #[test]
643 fn bit_length_null_binary_view() {
644 let value: Vec<Option<&[u8]>> =
645 vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
646 let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
647
648 let array = BinaryViewArray::from(value);
649 let result = bit_length(&array).unwrap();
650 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
651 let expected: Int32Array = expected.into();
652 assert_eq!(&expected, result);
653 }
654
655 fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
656 vec![(
657 vec![Some("one"), None, Some("three"), Some("four")],
658 4,
659 vec![Some(24), None, Some(40), Some(32)],
660 )]
661 }
662
663 #[test]
664 fn bit_length_null_string() {
665 bit_length_null_cases()
666 .into_iter()
667 .for_each(|(input, len, expected)| {
668 let array = StringArray::from(input);
669 let result = bit_length(&array).unwrap();
670 assert_eq!(len, result.len());
671 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
672
673 let expected: Int32Array = expected.into();
674 assert_eq!(&expected, result);
675 })
676 }
677
678 #[test]
679 fn bit_length_null_large_string() {
680 bit_length_null_cases()
681 .into_iter()
682 .for_each(|(input, len, expected)| {
683 let array = LargeStringArray::from(input);
684 let result = bit_length(&array).unwrap();
685 assert_eq!(len, result.len());
686 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
687
688 let expected: Int64Array = expected
690 .iter()
691 .map(|e| e.map(|e| e as i64))
692 .collect::<Vec<_>>()
693 .into();
694 assert_eq!(&expected, result);
695 })
696 }
697
698 #[test]
699 fn bit_length_null_binary() {
700 let value: Vec<Option<&[u8]>> =
701 vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
702 let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
703 length_binary_helper!(i32, Int32Array, bit_length, value, expected)
704 }
705
706 #[test]
707 fn bit_length_null_large_binary() {
708 let value: Vec<Option<&[u8]>> =
709 vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
710 let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
711 length_binary_helper!(i64, Int64Array, bit_length, value, expected)
712 }
713
714 #[test]
716 fn bit_length_wrong_type() {
717 let array: UInt64Array = vec![1u64].into();
718
719 assert!(bit_length(&array).is_err());
720 }
721
722 #[test]
724 fn bit_length_offsets_string() {
725 let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
726 let b = a.slice(1, 3);
727 let result = bit_length(&b).unwrap();
728 let result: &Int32Array = result.as_primitive();
729
730 let expected = Int32Array::from(vec![Some(8), Some(40), None]);
731 assert_eq!(&expected, result);
732 }
733
734 #[test]
735 fn bit_length_offsets_binary() {
736 let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
737 let a = BinaryArray::from(value);
738 let b = a.slice(1, 3);
739 let result = bit_length(&b).unwrap();
740 let result: &Int32Array = result.as_primitive();
741
742 let expected = Int32Array::from(vec![Some(0), Some(40), None]);
743 assert_eq!(&expected, result);
744 }
745
746 #[test]
747 fn length_dictionary() {
748 _length_dictionary::<Int8Type>();
749 _length_dictionary::<Int16Type>();
750 _length_dictionary::<Int32Type>();
751 _length_dictionary::<Int64Type>();
752 _length_dictionary::<UInt8Type>();
753 _length_dictionary::<UInt16Type>();
754 _length_dictionary::<UInt32Type>();
755 _length_dictionary::<UInt64Type>();
756 }
757
758 fn _length_dictionary<K: ArrowDictionaryKeyType>() {
759 const TOTAL: i32 = 100;
760
761 let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
762 let data: Vec<Option<&str>> = (0..TOTAL)
763 .map(|n| {
764 let i = n % 5;
765 if i == 3 { None } else { Some(v[i as usize]) }
766 })
767 .collect();
768
769 let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
770
771 let expected: Vec<Option<i32>> =
772 data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
773
774 let res = length(&dict_array).unwrap();
775 let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
776 let actual: Vec<Option<i32>> = actual
777 .values()
778 .as_any()
779 .downcast_ref::<Int32Array>()
780 .unwrap()
781 .take_iter(dict_array.keys_iter())
782 .collect();
783
784 for i in 0..TOTAL as usize {
785 assert_eq!(expected[i], actual[i],);
786 }
787 }
788
789 #[test]
790 fn bit_length_dictionary() {
791 _bit_length_dictionary::<Int8Type>();
792 _bit_length_dictionary::<Int16Type>();
793 _bit_length_dictionary::<Int32Type>();
794 _bit_length_dictionary::<Int64Type>();
795 _bit_length_dictionary::<UInt8Type>();
796 _bit_length_dictionary::<UInt16Type>();
797 _bit_length_dictionary::<UInt32Type>();
798 _bit_length_dictionary::<UInt64Type>();
799 }
800
801 fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
802 const TOTAL: i32 = 100;
803
804 let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
805 let data: Vec<Option<&str>> = (0..TOTAL)
806 .map(|n| {
807 let i = n % 5;
808 if i == 3 { None } else { Some(v[i as usize]) }
809 })
810 .collect();
811
812 let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
813
814 let expected: Vec<Option<i32>> = data
815 .iter()
816 .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
817 .collect();
818
819 let res = bit_length(&dict_array).unwrap();
820 let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
821 let actual: Vec<Option<i32>> = actual
822 .values()
823 .as_any()
824 .downcast_ref::<Int32Array>()
825 .unwrap()
826 .take_iter(dict_array.keys_iter())
827 .collect();
828
829 for i in 0..TOTAL as usize {
830 assert_eq!(expected[i], actual[i],);
831 }
832 }
833
834 #[test]
835 fn test_fixed_size_list_length() {
836 let value_data = ArrayData::builder(DataType::Int32)
838 .len(9)
839 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
840 .build()
841 .unwrap();
842 let list_data_type =
843 DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
844 let nulls = NullBuffer::from(vec![true, false, true]);
845 let list_data = ArrayData::builder(list_data_type)
846 .len(3)
847 .add_child_data(value_data)
848 .nulls(Some(nulls))
849 .build()
850 .unwrap();
851 let list_array = FixedSizeListArray::from(list_data);
852
853 let lengths = length(&list_array).unwrap();
854 let lengths = lengths.as_primitive::<Int32Type>();
855
856 assert_eq!(lengths.len(), 3);
857 assert_eq!(lengths.value(0), 3);
858 assert!(lengths.is_null(1));
859 assert_eq!(lengths.value(2), 3);
860 }
861
862 #[test]
863 fn test_fixed_size_binary() {
864 let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
865 let result = length(&array).unwrap();
866 assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
867
868 let result = bit_length(&array).unwrap();
869 assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
870 }
871 #[test]
872 fn length_test_ree_string_values() {
873 use arrow_array::RunArray;
874 use arrow_array::types::Int32Type;
875
876 let string_values = StringArray::from(vec!["hello", "owl", "test", "arrow", "a"]);
877 let run_ends = PrimitiveArray::<Int32Type>::from(vec![2i32, 5, 9, 11, 14]);
878 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &string_values).unwrap();
879
880 let result = length(&ree_array).unwrap();
881 let result = result
882 .as_any()
883 .downcast_ref::<RunArray<Int32Type>>()
884 .unwrap();
885
886 let result_values = result
887 .values()
888 .as_any()
889 .downcast_ref::<Int32Array>()
890 .unwrap();
891
892 let expected: Int32Array = vec![5, 3, 4, 5, 1].into();
893 assert_eq!(&expected, result_values);
894 }
895 #[test]
896 fn length_test_ree_invalid_type_early_fail() {
897 use arrow_array::RunArray;
898 use arrow_array::types::Int32Type;
899
900 let uint64_values = UInt64Array::from(vec![1u64, 2, 3]);
901 let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
902 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &uint64_values).unwrap();
903
904 assert!(length(&ree_array).is_err());
905 }
906}