1use arrow_array::*;
21use arrow_array::{cast::AsArray, types::*};
22use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
23use arrow_schema::{ArrowError, DataType};
24use std::sync::Arc;
25
26fn length_impl<P: ArrowPrimitiveType>(
27 offsets: &OffsetBuffer<P::Native>,
28 nulls: Option<&NullBuffer>,
29) -> ArrayRef {
30 let v: Vec<_> = offsets
31 .windows(2)
32 .map(|w| w[1].sub_wrapping(w[0]))
33 .collect();
34 Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
35}
36
37fn bit_length_impl<P: ArrowPrimitiveType>(
38 offsets: &OffsetBuffer<P::Native>,
39 nulls: Option<&NullBuffer>,
40) -> ArrayRef {
41 let bits = P::Native::usize_as(8);
42 let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
43 let v: Vec<_> = offsets.windows(2).map(c).collect();
44 Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
45}
46
47pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
57 if let Some(d) = array.as_any_dictionary_opt() {
58 let lengths = length(d.values().as_ref())?;
59 return Ok(d.with_values(lengths));
60 }
61 if let Some(ree) = array.as_any_ree_opt() {
62 let lengths = length(ree.values())?;
63 return Ok(ree.with_values(lengths));
64 }
65 match array.data_type() {
66 DataType::List(_) => {
67 let list = array.as_list::<i32>();
68 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
69 }
70 DataType::LargeList(_) => {
71 let list = array.as_list::<i64>();
72 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
73 }
74 DataType::ListView(_) => {
75 let list = array.as_list_view::<i32>();
76 Ok(Arc::new(Int32Array::new(
77 list.sizes().clone(),
78 list.nulls().cloned(),
79 )))
80 }
81 DataType::LargeListView(_) => {
82 let list = array.as_list_view::<i64>();
83 Ok(Arc::new(Int64Array::new(
84 list.sizes().clone(),
85 list.nulls().cloned(),
86 )))
87 }
88 DataType::Utf8 => {
89 let list = array.as_string::<i32>();
90 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
91 }
92 DataType::LargeUtf8 => {
93 let list = array.as_string::<i64>();
94 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
95 }
96 DataType::Utf8View => {
97 let list = array.as_string_view();
98 let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
99 Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
100 v.into(),
101 list.nulls().cloned(),
102 )?))
103 }
104 DataType::Binary => {
105 let list = array.as_binary::<i32>();
106 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
107 }
108 DataType::LargeBinary => {
109 let list = array.as_binary::<i64>();
110 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
111 }
112 DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
113 Int32Array::try_new(vec![*len; array.len()].into(), array.nulls().cloned())?,
114 )),
115 DataType::BinaryView => {
116 let list = array.as_binary_view();
117 let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
118 Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
119 v.into(),
120 list.nulls().cloned(),
121 )?))
122 }
123 other => Err(ArrowError::ComputeError(format!(
124 "length not supported for {other:?}"
125 ))),
126 }
127}
128
129pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
137 if let Some(d) = array.as_any_dictionary_opt() {
138 let lengths = bit_length(d.values().as_ref())?;
139 return Ok(d.with_values(lengths));
140 }
141 if let Some(ree) = array.as_any_ree_opt() {
142 let lengths = bit_length(ree.values())?;
143 return Ok(ree.with_values(lengths));
144 }
145
146 match array.data_type() {
147 DataType::Utf8 => {
148 let list = array.as_string::<i32>();
149 Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
150 }
151 DataType::LargeUtf8 => {
152 let list = array.as_string::<i64>();
153 Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
154 }
155 DataType::Utf8View => {
156 let list = array.as_string_view();
157 let values = list
158 .views()
159 .iter()
160 .map(|view| (*view as i32).wrapping_mul(8))
161 .collect();
162 Ok(Arc::new(Int32Array::try_new(
163 values,
164 array.nulls().cloned(),
165 )?))
166 }
167 DataType::Binary => {
168 let list = array.as_binary::<i32>();
169 Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
170 }
171 DataType::LargeBinary => {
172 let list = array.as_binary::<i64>();
173 Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
174 }
175 DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
176 vec![*len * 8; array.len()].into(),
177 array.nulls().cloned(),
178 )?)),
179 DataType::BinaryView => {
180 let list = array.as_binary_view();
181 let values = list
182 .views()
183 .iter()
184 .map(|view| (*view as i32).wrapping_mul(8))
185 .collect();
186 Ok(Arc::new(Int32Array::try_new(
187 values,
188 array.nulls().cloned(),
189 )?))
190 }
191 other => Err(ArrowError::ComputeError(format!(
192 "bit_length not supported for {other:?}"
193 ))),
194 }
195}
196
197#[cfg(test)]
198mod tests {
199 use super::*;
200 use arrow_buffer::{Buffer, ScalarBuffer};
201 use arrow_data::ArrayData;
202 use arrow_schema::Field;
203
204 fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
205 let values = [
207 "one",
208 "on",
209 "o",
210 "",
211 "this is a longer string to test string array with",
212 ];
213 let values = values.into_iter().cycle().take(4096).collect();
214 let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
215
216 vec![
217 (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
218 (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
219 (vec!["💖"], 1, vec![4]),
220 (values, 4096, expected),
221 ]
222 }
223
224 macro_rules! length_binary_helper {
225 ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
226 let array = GenericBinaryArray::<$offset_ty>::from($value);
227 let result = $kernel(&array).unwrap();
228 let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
229 let expected: $result_ty = $expected.into();
230 assert_eq!(&expected, result);
231 }};
232 }
233
234 macro_rules! length_list_helper {
235 ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
236 let array =
237 GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
238 let result = length(&array).unwrap();
239 let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
240 let expected: $result_ty = $expected.into();
241 assert_eq!(&expected, result);
242 }};
243 }
244
245 #[test]
246 fn length_test_string() {
247 length_cases_string()
248 .into_iter()
249 .for_each(|(input, len, expected)| {
250 let array = StringArray::from(input);
251 let result = length(&array).unwrap();
252 assert_eq!(len, result.len());
253 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
254 expected.iter().enumerate().for_each(|(i, value)| {
255 assert_eq!(*value, result.value(i));
256 });
257 })
258 }
259
260 #[test]
261 fn length_test_large_string() {
262 length_cases_string()
263 .into_iter()
264 .for_each(|(input, len, expected)| {
265 let array = LargeStringArray::from(input);
266 let result = length(&array).unwrap();
267 assert_eq!(len, result.len());
268 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
269 expected.iter().enumerate().for_each(|(i, value)| {
270 assert_eq!(*value as i64, result.value(i));
271 });
272 })
273 }
274
275 #[test]
276 fn length_test_string_view() {
277 length_cases_string()
278 .into_iter()
279 .for_each(|(input, len, expected)| {
280 let array = StringViewArray::from(input);
281 let result = length(&array).unwrap();
282 assert_eq!(len, result.len());
283 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
284 expected.iter().enumerate().for_each(|(i, value)| {
285 assert_eq!(*value, result.value(i));
286 });
287 })
288 }
289
290 #[test]
291 fn length_test_binary() {
292 let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
293 let result: Vec<i32> = vec![4, 3, 2];
294 length_binary_helper!(i32, Int32Array, length, value, result)
295 }
296
297 #[test]
298 fn length_test_large_binary() {
299 let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
300 let result: Vec<i64> = vec![4, 2, 3];
301 length_binary_helper!(i64, Int64Array, length, value, result)
302 }
303
304 #[test]
305 fn length_test_binary_view() {
306 let value: Vec<&[u8]> = vec![
307 b"zero",
308 &[0xff, 0xf8],
309 b"two",
310 b"this is a longer string to test binary array with",
311 ];
312 let expected: Vec<i32> = vec![4, 2, 3, 49];
313
314 let array = BinaryViewArray::from(value);
315 let result = length(&array).unwrap();
316 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
317 let expected: Int32Array = expected.into();
318 assert_eq!(&expected, result);
319 }
320
321 #[test]
322 fn length_test_list() {
323 let value = vec![
324 Some(vec![]),
325 Some(vec![Some(1), Some(2), Some(4)]),
326 Some(vec![Some(0)]),
327 ];
328 let result: Vec<i32> = vec![0, 3, 1];
329 length_list_helper!(i32, Int32Array, Int32Type, value, result)
330 }
331
332 #[test]
333 fn length_test_large_list() {
334 let value = vec![
335 Some(vec![]),
336 Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
337 Some(vec![None]),
338 ];
339 let result: Vec<i64> = vec![0, 3, 1];
340 length_list_helper!(i64, Int64Array, Float32Type, value, result)
341 }
342
343 type OptionStr = Option<&'static str>;
344
345 fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
346 vec![(
347 vec![Some("one"), None, Some("three"), Some("four")],
348 4,
349 vec![Some(3), None, Some(5), Some(4)],
350 )]
351 }
352
353 #[test]
354 fn length_null_string() {
355 length_null_cases_string()
356 .into_iter()
357 .for_each(|(input, len, expected)| {
358 let array = StringArray::from(input);
359 let result = length(&array).unwrap();
360 assert_eq!(len, result.len());
361 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
362
363 let expected: Int32Array = expected.into();
364 assert_eq!(&expected, result);
365 })
366 }
367
368 #[test]
369 fn length_null_large_string() {
370 length_null_cases_string()
371 .into_iter()
372 .for_each(|(input, len, expected)| {
373 let array = LargeStringArray::from(input);
374 let result = length(&array).unwrap();
375 assert_eq!(len, result.len());
376 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
377
378 let expected: Int64Array = expected
380 .iter()
381 .map(|e| e.map(|e| e as i64))
382 .collect::<Vec<_>>()
383 .into();
384 assert_eq!(&expected, result);
385 })
386 }
387
388 #[test]
389 fn length_null_binary() {
390 let value: Vec<Option<&[u8]>> =
391 vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
392 let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
393 length_binary_helper!(i32, Int32Array, length, value, result)
394 }
395
396 #[test]
397 fn length_null_large_binary() {
398 let value: Vec<Option<&[u8]>> =
399 vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
400 let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
401 length_binary_helper!(i64, Int64Array, length, value, result)
402 }
403
404 #[test]
405 fn length_null_list() {
406 let value = vec![
407 Some(vec![]),
408 None,
409 Some(vec![Some(1), None, Some(2), Some(4)]),
410 Some(vec![Some(0)]),
411 ];
412 let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
413 length_list_helper!(i32, Int32Array, Int8Type, value, result)
414 }
415
416 #[test]
417 fn length_null_large_list() {
418 let value = vec![
419 Some(vec![]),
420 None,
421 Some(vec![Some(1.1), None, Some(4.0)]),
422 Some(vec![Some(0.1)]),
423 ];
424 let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
425 length_list_helper!(i64, Int64Array, Float32Type, value, result)
426 }
427
428 #[test]
429 fn length_test_list_view() {
430 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
432 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
433 let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
434 let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
435 let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
436
437 let result = length(&list_array).unwrap();
438 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
439 let expected: Int32Array = vec![3, 3, 2].into();
440 assert_eq!(&expected, result);
441 }
442
443 #[test]
444 fn length_test_large_list_view() {
445 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
447 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
448 let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
449 let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
450 let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
451
452 let result = length(&list_array).unwrap();
453 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
454 let expected: Int64Array = vec![3i64, 3, 2].into();
455 assert_eq!(&expected, result);
456 }
457
458 #[test]
459 fn length_null_list_view() {
460 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
462 let values = Int32Array::from(vec![1, 2, 3, 4, 0]);
463 let offsets = ScalarBuffer::from(vec![0i32, 0, 0, 4]);
464 let sizes = ScalarBuffer::from(vec![0i32, 0, 4, 1]);
465 let nulls = NullBuffer::from(vec![true, false, true, true]);
466 let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
467
468 let result = length(&list_array).unwrap();
469 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
470 let expected: Int32Array = vec![Some(0), None, Some(4), Some(1)].into();
471 assert_eq!(&expected, result);
472 }
473
474 #[test]
475 fn length_null_large_list_view() {
476 let field = Arc::new(Field::new_list_field(DataType::Float32, true));
478 let values = Float32Array::from(vec![1.0, 2.0, 3.0, 0.1]);
479 let offsets = ScalarBuffer::from(vec![0i64, 0, 0, 3]);
480 let sizes = ScalarBuffer::from(vec![0i64, 0, 3, 1]);
481 let nulls = NullBuffer::from(vec![true, false, true, true]);
482 let list_array =
483 LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
484
485 let result = length(&list_array).unwrap();
486 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
487 let expected: Int64Array = vec![Some(0i64), None, Some(3), Some(1)].into();
488 assert_eq!(&expected, result);
489 }
490
491 #[test]
493 fn length_wrong_type() {
494 let array: UInt64Array = vec![1u64].into();
495
496 assert!(length(&array).is_err());
497 }
498
499 #[test]
501 fn length_offsets_string() {
502 let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
503 let b = a.slice(1, 3);
504 let result = length(&b).unwrap();
505 let result: &Int32Array = result.as_primitive();
506
507 let expected = Int32Array::from(vec![Some(1), Some(5), None]);
508 assert_eq!(&expected, result);
509 }
510
511 #[test]
512 fn length_offsets_binary() {
513 let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
514 let a = BinaryArray::from(value);
515 let b = a.slice(1, 3);
516 let result = length(&b).unwrap();
517 let result: &Int32Array = result.as_primitive();
518
519 let expected = Int32Array::from(vec![Some(1), Some(2), None]);
520 assert_eq!(&expected, result);
521 }
522
523 fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
524 let values = ["one", "on", "o", ""];
526 let values = values.into_iter().cycle().take(4096).collect();
527 let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
528
529 vec![
530 (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
531 (vec!["💖"], 1, vec![32]),
532 (vec!["josé"], 1, vec![40]),
533 (values, 4096, expected),
534 ]
535 }
536
537 #[test]
538 fn bit_length_test_string() {
539 bit_length_cases()
540 .into_iter()
541 .for_each(|(input, len, expected)| {
542 let array = StringArray::from(input);
543 let result = bit_length(&array).unwrap();
544 assert_eq!(len, result.len());
545 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
546 expected.iter().enumerate().for_each(|(i, value)| {
547 assert_eq!(*value, result.value(i));
548 });
549 })
550 }
551
552 #[test]
553 fn bit_length_test_large_string() {
554 bit_length_cases()
555 .into_iter()
556 .for_each(|(input, len, expected)| {
557 let array = LargeStringArray::from(input);
558 let result = bit_length(&array).unwrap();
559 assert_eq!(len, result.len());
560 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
561 expected.iter().enumerate().for_each(|(i, value)| {
562 assert_eq!(*value as i64, result.value(i));
563 });
564 })
565 }
566
567 #[test]
568 fn bit_length_test_utf8view() {
569 bit_length_cases()
570 .into_iter()
571 .for_each(|(input, len, expected)| {
572 let string_array = StringViewArray::from(input);
573 let result = bit_length(&string_array).unwrap();
574 assert_eq!(len, result.len());
575 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
576 expected.iter().enumerate().for_each(|(i, value)| {
577 assert_eq!(*value, result.value(i));
578 });
579 })
580 }
581
582 #[test]
583 fn bit_length_null_utf8view() {
584 bit_length_null_cases()
585 .into_iter()
586 .for_each(|(input, len, expected)| {
587 let array = StringArray::from(input);
588 let result = bit_length(&array).unwrap();
589 assert_eq!(len, result.len());
590 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
591
592 let expected: Int32Array = expected.into();
593 assert_eq!(&expected, result);
594 })
595 }
596 #[test]
597 fn bit_length_binary() {
598 let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
599 let expected: Vec<i32> = vec![24, 16, 40];
600 length_binary_helper!(i32, Int32Array, bit_length, value, expected)
601 }
602
603 #[test]
604 fn bit_length_large_binary() {
605 let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
606 let expected: Vec<i64> = vec![32, 8, 16];
607 length_binary_helper!(i64, Int64Array, bit_length, value, expected)
608 }
609
610 #[test]
611 fn bit_length_binary_view() {
612 let value: Vec<&[u8]> = vec![
613 b"zero",
614 &[0xff, 0xf8],
615 b"two",
616 b"this is a longer string to test binary array with",
617 ];
618 let expected: Vec<i32> = vec![32, 16, 24, 392];
619
620 let array = BinaryViewArray::from(value);
621 let result = bit_length(&array).unwrap();
622 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
623 let expected: Int32Array = expected.into();
624 assert_eq!(&expected, result);
625 }
626
627 #[test]
628 fn bit_length_null_binary_view() {
629 let value: Vec<Option<&[u8]>> =
630 vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
631 let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
632
633 let array = BinaryViewArray::from(value);
634 let result = bit_length(&array).unwrap();
635 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
636 let expected: Int32Array = expected.into();
637 assert_eq!(&expected, result);
638 }
639
640 fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
641 vec![(
642 vec![Some("one"), None, Some("three"), Some("four")],
643 4,
644 vec![Some(24), None, Some(40), Some(32)],
645 )]
646 }
647
648 #[test]
649 fn bit_length_null_string() {
650 bit_length_null_cases()
651 .into_iter()
652 .for_each(|(input, len, expected)| {
653 let array = StringArray::from(input);
654 let result = bit_length(&array).unwrap();
655 assert_eq!(len, result.len());
656 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
657
658 let expected: Int32Array = expected.into();
659 assert_eq!(&expected, result);
660 })
661 }
662
663 #[test]
664 fn bit_length_null_large_string() {
665 bit_length_null_cases()
666 .into_iter()
667 .for_each(|(input, len, expected)| {
668 let array = LargeStringArray::from(input);
669 let result = bit_length(&array).unwrap();
670 assert_eq!(len, result.len());
671 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
672
673 let expected: Int64Array = expected
675 .iter()
676 .map(|e| e.map(|e| e as i64))
677 .collect::<Vec<_>>()
678 .into();
679 assert_eq!(&expected, result);
680 })
681 }
682
683 #[test]
684 fn bit_length_null_binary() {
685 let value: Vec<Option<&[u8]>> =
686 vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
687 let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
688 length_binary_helper!(i32, Int32Array, bit_length, value, expected)
689 }
690
691 #[test]
692 fn bit_length_null_large_binary() {
693 let value: Vec<Option<&[u8]>> =
694 vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
695 let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
696 length_binary_helper!(i64, Int64Array, bit_length, value, expected)
697 }
698
699 #[test]
701 fn bit_length_wrong_type() {
702 let array: UInt64Array = vec![1u64].into();
703
704 assert!(bit_length(&array).is_err());
705 }
706
707 #[test]
709 fn bit_length_offsets_string() {
710 let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
711 let b = a.slice(1, 3);
712 let result = bit_length(&b).unwrap();
713 let result: &Int32Array = result.as_primitive();
714
715 let expected = Int32Array::from(vec![Some(8), Some(40), None]);
716 assert_eq!(&expected, result);
717 }
718
719 #[test]
720 fn bit_length_offsets_binary() {
721 let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
722 let a = BinaryArray::from(value);
723 let b = a.slice(1, 3);
724 let result = bit_length(&b).unwrap();
725 let result: &Int32Array = result.as_primitive();
726
727 let expected = Int32Array::from(vec![Some(0), Some(40), None]);
728 assert_eq!(&expected, result);
729 }
730
731 #[test]
732 fn length_dictionary() {
733 _length_dictionary::<Int8Type>();
734 _length_dictionary::<Int16Type>();
735 _length_dictionary::<Int32Type>();
736 _length_dictionary::<Int64Type>();
737 _length_dictionary::<UInt8Type>();
738 _length_dictionary::<UInt16Type>();
739 _length_dictionary::<UInt32Type>();
740 _length_dictionary::<UInt64Type>();
741 }
742
743 fn _length_dictionary<K: ArrowDictionaryKeyType>() {
744 const TOTAL: i32 = 100;
745
746 let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
747 let data: Vec<Option<&str>> = (0..TOTAL)
748 .map(|n| {
749 let i = n % 5;
750 if i == 3 { None } else { Some(v[i as usize]) }
751 })
752 .collect();
753
754 let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
755
756 let expected: Vec<Option<i32>> =
757 data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
758
759 let res = length(&dict_array).unwrap();
760 let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
761 let actual: Vec<Option<i32>> = actual
762 .values()
763 .as_any()
764 .downcast_ref::<Int32Array>()
765 .unwrap()
766 .take_iter(dict_array.keys_iter())
767 .collect();
768
769 for i in 0..TOTAL as usize {
770 assert_eq!(expected[i], actual[i],);
771 }
772 }
773
774 #[test]
775 fn bit_length_dictionary() {
776 _bit_length_dictionary::<Int8Type>();
777 _bit_length_dictionary::<Int16Type>();
778 _bit_length_dictionary::<Int32Type>();
779 _bit_length_dictionary::<Int64Type>();
780 _bit_length_dictionary::<UInt8Type>();
781 _bit_length_dictionary::<UInt16Type>();
782 _bit_length_dictionary::<UInt32Type>();
783 _bit_length_dictionary::<UInt64Type>();
784 }
785
786 fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
787 const TOTAL: i32 = 100;
788
789 let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
790 let data: Vec<Option<&str>> = (0..TOTAL)
791 .map(|n| {
792 let i = n % 5;
793 if i == 3 { None } else { Some(v[i as usize]) }
794 })
795 .collect();
796
797 let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
798
799 let expected: Vec<Option<i32>> = data
800 .iter()
801 .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
802 .collect();
803
804 let res = bit_length(&dict_array).unwrap();
805 let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
806 let actual: Vec<Option<i32>> = actual
807 .values()
808 .as_any()
809 .downcast_ref::<Int32Array>()
810 .unwrap()
811 .take_iter(dict_array.keys_iter())
812 .collect();
813
814 for i in 0..TOTAL as usize {
815 assert_eq!(expected[i], actual[i],);
816 }
817 }
818
819 #[test]
820 fn test_fixed_size_list_length() {
821 let value_data = ArrayData::builder(DataType::Int32)
823 .len(9)
824 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
825 .build()
826 .unwrap();
827 let list_data_type =
828 DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
829 let nulls = NullBuffer::from(vec![true, false, true]);
830 let list_data = ArrayData::builder(list_data_type)
831 .len(3)
832 .add_child_data(value_data)
833 .nulls(Some(nulls))
834 .build()
835 .unwrap();
836 let list_array = FixedSizeListArray::from(list_data);
837
838 let lengths = length(&list_array).unwrap();
839 let lengths = lengths.as_primitive::<Int32Type>();
840
841 assert_eq!(lengths.len(), 3);
842 assert_eq!(lengths.value(0), 3);
843 assert!(lengths.is_null(1));
844 assert_eq!(lengths.value(2), 3);
845 }
846
847 #[test]
848 fn test_fixed_size_binary() {
849 let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
850 let result = length(&array).unwrap();
851 assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
852
853 let result = bit_length(&array).unwrap();
854 assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
855 }
856 #[test]
857 fn length_test_ree_string_values() {
858 use arrow_array::RunArray;
859 use arrow_array::types::Int32Type;
860
861 let string_values = StringArray::from(vec!["hello", "owl", "test", "arrow", "a"]);
862 let run_ends = PrimitiveArray::<Int32Type>::from(vec![2i32, 5, 9, 11, 14]);
863 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &string_values).unwrap();
864
865 let result = length(&ree_array).unwrap();
866 let result = result
867 .as_any()
868 .downcast_ref::<RunArray<Int32Type>>()
869 .unwrap();
870
871 let result_values = result
872 .values()
873 .as_any()
874 .downcast_ref::<Int32Array>()
875 .unwrap();
876
877 let expected: Int32Array = vec![5, 3, 4, 5, 1].into();
878 assert_eq!(&expected, result_values);
879 }
880 #[test]
881 fn length_test_ree_invalid_type_early_fail() {
882 use arrow_array::RunArray;
883 use arrow_array::types::Int32Type;
884
885 let uint64_values = UInt64Array::from(vec![1u64, 2, 3]);
886 let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
887 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &uint64_values).unwrap();
888
889 assert!(length(&ree_array).is_err());
890 }
891
892 #[test]
893 fn bit_length_test_ree_utf8() {
894 use arrow_array::RunArray;
895 use arrow_array::types::Int32Type;
896
897 let strings = StringArray::from(vec!["hello", "world", "test"]);
898 let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
899 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &strings).unwrap();
900
901 let result = bit_length(&ree_array).unwrap();
902 let result_values = result
903 .as_any()
904 .downcast_ref::<RunArray<Int32Type>>()
905 .unwrap()
906 .values()
907 .as_any()
908 .downcast_ref::<Int32Array>()
909 .unwrap();
910
911 let expected: Int32Array = vec![40, 40, 32].into();
912 assert_eq!(&expected, result_values);
913 }
914}