1use arrow_array::ree_map;
21use arrow_array::*;
22use arrow_array::{cast::AsArray, types::*};
23use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
24use arrow_schema::{ArrowError, DataType};
25use std::sync::Arc;
26
27fn length_impl<P: ArrowPrimitiveType>(
28 offsets: &OffsetBuffer<P::Native>,
29 nulls: Option<&NullBuffer>,
30) -> ArrayRef {
31 let v: Vec<_> = offsets
32 .windows(2)
33 .map(|w| w[1].sub_wrapping(w[0]))
34 .collect();
35 Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
36}
37
38fn bit_length_impl<P: ArrowPrimitiveType>(
39 offsets: &OffsetBuffer<P::Native>,
40 nulls: Option<&NullBuffer>,
41) -> ArrayRef {
42 let bits = P::Native::usize_as(8);
43 let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
44 let v: Vec<_> = offsets.windows(2).map(c).collect();
45 Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
46}
47
48pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
58 if let Some(d) = array.as_any_dictionary_opt() {
59 let lengths = length(d.values().as_ref())?;
60 return Ok(d.with_values(lengths));
61 }
62 match array.data_type() {
63 DataType::List(_) => {
64 let list = array.as_list::<i32>();
65 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
66 }
67 DataType::LargeList(_) => {
68 let list = array.as_list::<i64>();
69 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
70 }
71 DataType::ListView(_) => {
72 let list = array.as_list_view::<i32>();
73 Ok(Arc::new(Int32Array::new(
74 list.sizes().clone(),
75 list.nulls().cloned(),
76 )))
77 }
78 DataType::LargeListView(_) => {
79 let list = array.as_list_view::<i64>();
80 Ok(Arc::new(Int64Array::new(
81 list.sizes().clone(),
82 list.nulls().cloned(),
83 )))
84 }
85 DataType::Utf8 => {
86 let list = array.as_string::<i32>();
87 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
88 }
89 DataType::LargeUtf8 => {
90 let list = array.as_string::<i64>();
91 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
92 }
93 DataType::Utf8View => {
94 let list = array.as_string_view();
95 let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
96 Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
97 v.into(),
98 list.nulls().cloned(),
99 )?))
100 }
101 DataType::Binary => {
102 let list = array.as_binary::<i32>();
103 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
104 }
105 DataType::LargeBinary => {
106 let list = array.as_binary::<i64>();
107 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
108 }
109 DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
110 Int32Array::try_new(vec![*len; array.len()].into(), array.nulls().cloned())?,
111 )),
112 DataType::BinaryView => {
113 let list = array.as_binary_view();
114 let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
115 Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
116 v.into(),
117 list.nulls().cloned(),
118 )?))
119 }
120 DataType::RunEndEncoded(k, _) => match k.data_type() {
121 DataType::Int16 => ree_map!(array, Int16Type, length),
122 DataType::Int32 => ree_map!(array, Int32Type, length),
123 DataType::Int64 => ree_map!(array, Int64Type, length),
124 _ => Err(ArrowError::InvalidArgumentError(format!(
125 "Invalid run-end type: {:?}",
126 k.data_type()
127 ))),
128 },
129 other => Err(ArrowError::ComputeError(format!(
130 "length not supported for {other:?}"
131 ))),
132 }
133}
134
135pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
143 if let Some(d) = array.as_any_dictionary_opt() {
144 let lengths = bit_length(d.values().as_ref())?;
145 return Ok(d.with_values(lengths));
146 }
147
148 match array.data_type() {
149 DataType::Utf8 => {
150 let list = array.as_string::<i32>();
151 Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
152 }
153 DataType::LargeUtf8 => {
154 let list = array.as_string::<i64>();
155 Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
156 }
157 DataType::Utf8View => {
158 let list = array.as_string_view();
159 let values = list
160 .views()
161 .iter()
162 .map(|view| (*view as i32).wrapping_mul(8))
163 .collect();
164 Ok(Arc::new(Int32Array::try_new(
165 values,
166 array.nulls().cloned(),
167 )?))
168 }
169 DataType::Binary => {
170 let list = array.as_binary::<i32>();
171 Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
172 }
173 DataType::LargeBinary => {
174 let list = array.as_binary::<i64>();
175 Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
176 }
177 DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
178 vec![*len * 8; array.len()].into(),
179 array.nulls().cloned(),
180 )?)),
181 DataType::BinaryView => {
182 let list = array.as_binary_view();
183 let values = list
184 .views()
185 .iter()
186 .map(|view| (*view as i32).wrapping_mul(8))
187 .collect();
188 Ok(Arc::new(Int32Array::try_new(
189 values,
190 array.nulls().cloned(),
191 )?))
192 }
193 DataType::RunEndEncoded(k, _) => match k.data_type() {
194 DataType::Int16 => ree_map!(array, Int16Type, bit_length),
195 DataType::Int32 => ree_map!(array, Int32Type, bit_length),
196 DataType::Int64 => ree_map!(array, Int64Type, bit_length),
197 _ => Err(ArrowError::InvalidArgumentError(format!(
198 "Invalid run-end type: {:?}",
199 k.data_type()
200 ))),
201 },
202 other => Err(ArrowError::ComputeError(format!(
203 "bit_length not supported for {other:?}"
204 ))),
205 }
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211 use arrow_buffer::{Buffer, ScalarBuffer};
212 use arrow_data::ArrayData;
213 use arrow_schema::Field;
214
215 fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
216 let values = [
218 "one",
219 "on",
220 "o",
221 "",
222 "this is a longer string to test string array with",
223 ];
224 let values = values.into_iter().cycle().take(4096).collect();
225 let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
226
227 vec![
228 (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
229 (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
230 (vec!["💖"], 1, vec![4]),
231 (values, 4096, expected),
232 ]
233 }
234
235 macro_rules! length_binary_helper {
236 ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
237 let array = GenericBinaryArray::<$offset_ty>::from($value);
238 let result = $kernel(&array).unwrap();
239 let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
240 let expected: $result_ty = $expected.into();
241 assert_eq!(&expected, result);
242 }};
243 }
244
245 macro_rules! length_list_helper {
246 ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
247 let array =
248 GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
249 let result = length(&array).unwrap();
250 let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
251 let expected: $result_ty = $expected.into();
252 assert_eq!(&expected, result);
253 }};
254 }
255
256 #[test]
257 fn length_test_string() {
258 length_cases_string()
259 .into_iter()
260 .for_each(|(input, len, expected)| {
261 let array = StringArray::from(input);
262 let result = length(&array).unwrap();
263 assert_eq!(len, result.len());
264 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
265 expected.iter().enumerate().for_each(|(i, value)| {
266 assert_eq!(*value, result.value(i));
267 });
268 })
269 }
270
271 #[test]
272 fn length_test_large_string() {
273 length_cases_string()
274 .into_iter()
275 .for_each(|(input, len, expected)| {
276 let array = LargeStringArray::from(input);
277 let result = length(&array).unwrap();
278 assert_eq!(len, result.len());
279 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
280 expected.iter().enumerate().for_each(|(i, value)| {
281 assert_eq!(*value as i64, result.value(i));
282 });
283 })
284 }
285
286 #[test]
287 fn length_test_string_view() {
288 length_cases_string()
289 .into_iter()
290 .for_each(|(input, len, expected)| {
291 let array = StringViewArray::from(input);
292 let result = length(&array).unwrap();
293 assert_eq!(len, result.len());
294 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
295 expected.iter().enumerate().for_each(|(i, value)| {
296 assert_eq!(*value, result.value(i));
297 });
298 })
299 }
300
301 #[test]
302 fn length_test_binary() {
303 let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
304 let result: Vec<i32> = vec![4, 3, 2];
305 length_binary_helper!(i32, Int32Array, length, value, result)
306 }
307
308 #[test]
309 fn length_test_large_binary() {
310 let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
311 let result: Vec<i64> = vec![4, 2, 3];
312 length_binary_helper!(i64, Int64Array, length, value, result)
313 }
314
315 #[test]
316 fn length_test_binary_view() {
317 let value: Vec<&[u8]> = vec![
318 b"zero",
319 &[0xff, 0xf8],
320 b"two",
321 b"this is a longer string to test binary array with",
322 ];
323 let expected: Vec<i32> = vec![4, 2, 3, 49];
324
325 let array = BinaryViewArray::from(value);
326 let result = length(&array).unwrap();
327 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
328 let expected: Int32Array = expected.into();
329 assert_eq!(&expected, result);
330 }
331
332 #[test]
333 fn length_test_list() {
334 let value = vec![
335 Some(vec![]),
336 Some(vec![Some(1), Some(2), Some(4)]),
337 Some(vec![Some(0)]),
338 ];
339 let result: Vec<i32> = vec![0, 3, 1];
340 length_list_helper!(i32, Int32Array, Int32Type, value, result)
341 }
342
343 #[test]
344 fn length_test_large_list() {
345 let value = vec![
346 Some(vec![]),
347 Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
348 Some(vec![None]),
349 ];
350 let result: Vec<i64> = vec![0, 3, 1];
351 length_list_helper!(i64, Int64Array, Float32Type, value, result)
352 }
353
354 type OptionStr = Option<&'static str>;
355
356 fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
357 vec![(
358 vec![Some("one"), None, Some("three"), Some("four")],
359 4,
360 vec![Some(3), None, Some(5), Some(4)],
361 )]
362 }
363
364 #[test]
365 fn length_null_string() {
366 length_null_cases_string()
367 .into_iter()
368 .for_each(|(input, len, expected)| {
369 let array = StringArray::from(input);
370 let result = length(&array).unwrap();
371 assert_eq!(len, result.len());
372 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
373
374 let expected: Int32Array = expected.into();
375 assert_eq!(&expected, result);
376 })
377 }
378
379 #[test]
380 fn length_null_large_string() {
381 length_null_cases_string()
382 .into_iter()
383 .for_each(|(input, len, expected)| {
384 let array = LargeStringArray::from(input);
385 let result = length(&array).unwrap();
386 assert_eq!(len, result.len());
387 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
388
389 let expected: Int64Array = expected
391 .iter()
392 .map(|e| e.map(|e| e as i64))
393 .collect::<Vec<_>>()
394 .into();
395 assert_eq!(&expected, result);
396 })
397 }
398
399 #[test]
400 fn length_null_binary() {
401 let value: Vec<Option<&[u8]>> =
402 vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
403 let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
404 length_binary_helper!(i32, Int32Array, length, value, result)
405 }
406
407 #[test]
408 fn length_null_large_binary() {
409 let value: Vec<Option<&[u8]>> =
410 vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
411 let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
412 length_binary_helper!(i64, Int64Array, length, value, result)
413 }
414
415 #[test]
416 fn length_null_list() {
417 let value = vec![
418 Some(vec![]),
419 None,
420 Some(vec![Some(1), None, Some(2), Some(4)]),
421 Some(vec![Some(0)]),
422 ];
423 let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
424 length_list_helper!(i32, Int32Array, Int8Type, value, result)
425 }
426
427 #[test]
428 fn length_null_large_list() {
429 let value = vec![
430 Some(vec![]),
431 None,
432 Some(vec![Some(1.1), None, Some(4.0)]),
433 Some(vec![Some(0.1)]),
434 ];
435 let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
436 length_list_helper!(i64, Int64Array, Float32Type, value, result)
437 }
438
439 #[test]
440 fn length_test_list_view() {
441 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
443 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
444 let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
445 let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
446 let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
447
448 let result = length(&list_array).unwrap();
449 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
450 let expected: Int32Array = vec![3, 3, 2].into();
451 assert_eq!(&expected, result);
452 }
453
454 #[test]
455 fn length_test_large_list_view() {
456 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
458 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
459 let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
460 let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
461 let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
462
463 let result = length(&list_array).unwrap();
464 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
465 let expected: Int64Array = vec![3i64, 3, 2].into();
466 assert_eq!(&expected, result);
467 }
468
469 #[test]
470 fn length_null_list_view() {
471 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
473 let values = Int32Array::from(vec![1, 2, 3, 4, 0]);
474 let offsets = ScalarBuffer::from(vec![0i32, 0, 0, 4]);
475 let sizes = ScalarBuffer::from(vec![0i32, 0, 4, 1]);
476 let nulls = NullBuffer::from(vec![true, false, true, true]);
477 let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
478
479 let result = length(&list_array).unwrap();
480 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
481 let expected: Int32Array = vec![Some(0), None, Some(4), Some(1)].into();
482 assert_eq!(&expected, result);
483 }
484
485 #[test]
486 fn length_null_large_list_view() {
487 let field = Arc::new(Field::new_list_field(DataType::Float32, true));
489 let values = Float32Array::from(vec![1.0, 2.0, 3.0, 0.1]);
490 let offsets = ScalarBuffer::from(vec![0i64, 0, 0, 3]);
491 let sizes = ScalarBuffer::from(vec![0i64, 0, 3, 1]);
492 let nulls = NullBuffer::from(vec![true, false, true, true]);
493 let list_array =
494 LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
495
496 let result = length(&list_array).unwrap();
497 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
498 let expected: Int64Array = vec![Some(0i64), None, Some(3), Some(1)].into();
499 assert_eq!(&expected, result);
500 }
501
502 #[test]
504 fn length_wrong_type() {
505 let array: UInt64Array = vec![1u64].into();
506
507 assert!(length(&array).is_err());
508 }
509
510 #[test]
512 fn length_offsets_string() {
513 let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
514 let b = a.slice(1, 3);
515 let result = length(&b).unwrap();
516 let result: &Int32Array = result.as_primitive();
517
518 let expected = Int32Array::from(vec![Some(1), Some(5), None]);
519 assert_eq!(&expected, result);
520 }
521
522 #[test]
523 fn length_offsets_binary() {
524 let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
525 let a = BinaryArray::from(value);
526 let b = a.slice(1, 3);
527 let result = length(&b).unwrap();
528 let result: &Int32Array = result.as_primitive();
529
530 let expected = Int32Array::from(vec![Some(1), Some(2), None]);
531 assert_eq!(&expected, result);
532 }
533
534 fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
535 let values = ["one", "on", "o", ""];
537 let values = values.into_iter().cycle().take(4096).collect();
538 let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
539
540 vec![
541 (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
542 (vec!["💖"], 1, vec![32]),
543 (vec!["josé"], 1, vec![40]),
544 (values, 4096, expected),
545 ]
546 }
547
548 #[test]
549 fn bit_length_test_string() {
550 bit_length_cases()
551 .into_iter()
552 .for_each(|(input, len, expected)| {
553 let array = StringArray::from(input);
554 let result = bit_length(&array).unwrap();
555 assert_eq!(len, result.len());
556 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
557 expected.iter().enumerate().for_each(|(i, value)| {
558 assert_eq!(*value, result.value(i));
559 });
560 })
561 }
562
563 #[test]
564 fn bit_length_test_large_string() {
565 bit_length_cases()
566 .into_iter()
567 .for_each(|(input, len, expected)| {
568 let array = LargeStringArray::from(input);
569 let result = bit_length(&array).unwrap();
570 assert_eq!(len, result.len());
571 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
572 expected.iter().enumerate().for_each(|(i, value)| {
573 assert_eq!(*value as i64, result.value(i));
574 });
575 })
576 }
577
578 #[test]
579 fn bit_length_test_utf8view() {
580 bit_length_cases()
581 .into_iter()
582 .for_each(|(input, len, expected)| {
583 let string_array = StringViewArray::from(input);
584 let result = bit_length(&string_array).unwrap();
585 assert_eq!(len, result.len());
586 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
587 expected.iter().enumerate().for_each(|(i, value)| {
588 assert_eq!(*value, result.value(i));
589 });
590 })
591 }
592
593 #[test]
594 fn bit_length_null_utf8view() {
595 bit_length_null_cases()
596 .into_iter()
597 .for_each(|(input, len, expected)| {
598 let array = StringArray::from(input);
599 let result = bit_length(&array).unwrap();
600 assert_eq!(len, result.len());
601 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
602
603 let expected: Int32Array = expected.into();
604 assert_eq!(&expected, result);
605 })
606 }
607 #[test]
608 fn bit_length_binary() {
609 let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
610 let expected: Vec<i32> = vec![24, 16, 40];
611 length_binary_helper!(i32, Int32Array, bit_length, value, expected)
612 }
613
614 #[test]
615 fn bit_length_large_binary() {
616 let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
617 let expected: Vec<i64> = vec![32, 8, 16];
618 length_binary_helper!(i64, Int64Array, bit_length, value, expected)
619 }
620
621 #[test]
622 fn bit_length_binary_view() {
623 let value: Vec<&[u8]> = vec![
624 b"zero",
625 &[0xff, 0xf8],
626 b"two",
627 b"this is a longer string to test binary array with",
628 ];
629 let expected: Vec<i32> = vec![32, 16, 24, 392];
630
631 let array = BinaryViewArray::from(value);
632 let result = bit_length(&array).unwrap();
633 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
634 let expected: Int32Array = expected.into();
635 assert_eq!(&expected, result);
636 }
637
638 #[test]
639 fn bit_length_null_binary_view() {
640 let value: Vec<Option<&[u8]>> =
641 vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
642 let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
643
644 let array = BinaryViewArray::from(value);
645 let result = bit_length(&array).unwrap();
646 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
647 let expected: Int32Array = expected.into();
648 assert_eq!(&expected, result);
649 }
650
651 fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
652 vec![(
653 vec![Some("one"), None, Some("three"), Some("four")],
654 4,
655 vec![Some(24), None, Some(40), Some(32)],
656 )]
657 }
658
659 #[test]
660 fn bit_length_null_string() {
661 bit_length_null_cases()
662 .into_iter()
663 .for_each(|(input, len, expected)| {
664 let array = StringArray::from(input);
665 let result = bit_length(&array).unwrap();
666 assert_eq!(len, result.len());
667 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
668
669 let expected: Int32Array = expected.into();
670 assert_eq!(&expected, result);
671 })
672 }
673
674 #[test]
675 fn bit_length_null_large_string() {
676 bit_length_null_cases()
677 .into_iter()
678 .for_each(|(input, len, expected)| {
679 let array = LargeStringArray::from(input);
680 let result = bit_length(&array).unwrap();
681 assert_eq!(len, result.len());
682 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
683
684 let expected: Int64Array = expected
686 .iter()
687 .map(|e| e.map(|e| e as i64))
688 .collect::<Vec<_>>()
689 .into();
690 assert_eq!(&expected, result);
691 })
692 }
693
694 #[test]
695 fn bit_length_null_binary() {
696 let value: Vec<Option<&[u8]>> =
697 vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
698 let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
699 length_binary_helper!(i32, Int32Array, bit_length, value, expected)
700 }
701
702 #[test]
703 fn bit_length_null_large_binary() {
704 let value: Vec<Option<&[u8]>> =
705 vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
706 let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
707 length_binary_helper!(i64, Int64Array, bit_length, value, expected)
708 }
709
710 #[test]
712 fn bit_length_wrong_type() {
713 let array: UInt64Array = vec![1u64].into();
714
715 assert!(bit_length(&array).is_err());
716 }
717
718 #[test]
720 fn bit_length_offsets_string() {
721 let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
722 let b = a.slice(1, 3);
723 let result = bit_length(&b).unwrap();
724 let result: &Int32Array = result.as_primitive();
725
726 let expected = Int32Array::from(vec![Some(8), Some(40), None]);
727 assert_eq!(&expected, result);
728 }
729
730 #[test]
731 fn bit_length_offsets_binary() {
732 let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
733 let a = BinaryArray::from(value);
734 let b = a.slice(1, 3);
735 let result = bit_length(&b).unwrap();
736 let result: &Int32Array = result.as_primitive();
737
738 let expected = Int32Array::from(vec![Some(0), Some(40), None]);
739 assert_eq!(&expected, result);
740 }
741
742 #[test]
743 fn length_dictionary() {
744 _length_dictionary::<Int8Type>();
745 _length_dictionary::<Int16Type>();
746 _length_dictionary::<Int32Type>();
747 _length_dictionary::<Int64Type>();
748 _length_dictionary::<UInt8Type>();
749 _length_dictionary::<UInt16Type>();
750 _length_dictionary::<UInt32Type>();
751 _length_dictionary::<UInt64Type>();
752 }
753
754 fn _length_dictionary<K: ArrowDictionaryKeyType>() {
755 const TOTAL: i32 = 100;
756
757 let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
758 let data: Vec<Option<&str>> = (0..TOTAL)
759 .map(|n| {
760 let i = n % 5;
761 if i == 3 { None } else { Some(v[i as usize]) }
762 })
763 .collect();
764
765 let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
766
767 let expected: Vec<Option<i32>> =
768 data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
769
770 let res = length(&dict_array).unwrap();
771 let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
772 let actual: Vec<Option<i32>> = actual
773 .values()
774 .as_any()
775 .downcast_ref::<Int32Array>()
776 .unwrap()
777 .take_iter(dict_array.keys_iter())
778 .collect();
779
780 for i in 0..TOTAL as usize {
781 assert_eq!(expected[i], actual[i],);
782 }
783 }
784
785 #[test]
786 fn bit_length_dictionary() {
787 _bit_length_dictionary::<Int8Type>();
788 _bit_length_dictionary::<Int16Type>();
789 _bit_length_dictionary::<Int32Type>();
790 _bit_length_dictionary::<Int64Type>();
791 _bit_length_dictionary::<UInt8Type>();
792 _bit_length_dictionary::<UInt16Type>();
793 _bit_length_dictionary::<UInt32Type>();
794 _bit_length_dictionary::<UInt64Type>();
795 }
796
797 fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
798 const TOTAL: i32 = 100;
799
800 let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
801 let data: Vec<Option<&str>> = (0..TOTAL)
802 .map(|n| {
803 let i = n % 5;
804 if i == 3 { None } else { Some(v[i as usize]) }
805 })
806 .collect();
807
808 let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
809
810 let expected: Vec<Option<i32>> = data
811 .iter()
812 .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
813 .collect();
814
815 let res = bit_length(&dict_array).unwrap();
816 let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
817 let actual: Vec<Option<i32>> = actual
818 .values()
819 .as_any()
820 .downcast_ref::<Int32Array>()
821 .unwrap()
822 .take_iter(dict_array.keys_iter())
823 .collect();
824
825 for i in 0..TOTAL as usize {
826 assert_eq!(expected[i], actual[i],);
827 }
828 }
829
830 #[test]
831 fn test_fixed_size_list_length() {
832 let value_data = ArrayData::builder(DataType::Int32)
834 .len(9)
835 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
836 .build()
837 .unwrap();
838 let list_data_type =
839 DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
840 let nulls = NullBuffer::from(vec![true, false, true]);
841 let list_data = ArrayData::builder(list_data_type)
842 .len(3)
843 .add_child_data(value_data)
844 .nulls(Some(nulls))
845 .build()
846 .unwrap();
847 let list_array = FixedSizeListArray::from(list_data);
848
849 let lengths = length(&list_array).unwrap();
850 let lengths = lengths.as_primitive::<Int32Type>();
851
852 assert_eq!(lengths.len(), 3);
853 assert_eq!(lengths.value(0), 3);
854 assert!(lengths.is_null(1));
855 assert_eq!(lengths.value(2), 3);
856 }
857
858 #[test]
859 fn test_fixed_size_binary() {
860 let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
861 let result = length(&array).unwrap();
862 assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
863
864 let result = bit_length(&array).unwrap();
865 assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
866 }
867 #[test]
868 fn length_test_ree_string_values() {
869 use arrow_array::RunArray;
870 use arrow_array::types::Int32Type;
871
872 let string_values = StringArray::from(vec!["hello", "owl", "test", "arrow", "a"]);
873 let run_ends = PrimitiveArray::<Int32Type>::from(vec![2i32, 5, 9, 11, 14]);
874 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &string_values).unwrap();
875
876 let result = length(&ree_array).unwrap();
877 let result = result
878 .as_any()
879 .downcast_ref::<RunArray<Int32Type>>()
880 .unwrap();
881
882 let result_values = result
883 .values()
884 .as_any()
885 .downcast_ref::<Int32Array>()
886 .unwrap();
887
888 let expected: Int32Array = vec![5, 3, 4, 5, 1].into();
889 assert_eq!(&expected, result_values);
890 }
891 #[test]
892 fn length_test_ree_invalid_type_early_fail() {
893 use arrow_array::RunArray;
894 use arrow_array::types::Int32Type;
895
896 let uint64_values = UInt64Array::from(vec![1u64, 2, 3]);
897 let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
898 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &uint64_values).unwrap();
899
900 assert!(length(&ree_array).is_err());
901 }
902
903 #[test]
904 fn bit_length_test_ree_utf8() {
905 use arrow_array::RunArray;
906 use arrow_array::types::Int32Type;
907
908 let strings = StringArray::from(vec!["hello", "world", "test"]);
909 let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
910 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &strings).unwrap();
911
912 let result = bit_length(&ree_array).unwrap();
913 let result_values = result
914 .as_any()
915 .downcast_ref::<RunArray<Int32Type>>()
916 .unwrap()
917 .values()
918 .as_any()
919 .downcast_ref::<Int32Array>()
920 .unwrap();
921
922 let expected: Int32Array = vec![40, 40, 32].into();
923 assert_eq!(&expected, result_values);
924 }
925}