Skip to main content

arrow_string/
length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines kernel for length of string arrays and binary arrays
19
20use arrow_array::*;
21use arrow_array::{cast::AsArray, types::*};
22use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
23use arrow_schema::{ArrowError, DataType};
24use std::sync::Arc;
25
26fn length_impl<P: ArrowPrimitiveType>(
27    offsets: &OffsetBuffer<P::Native>,
28    nulls: Option<&NullBuffer>,
29) -> ArrayRef {
30    let v: Vec<_> = offsets
31        .windows(2)
32        .map(|w| w[1].sub_wrapping(w[0]))
33        .collect();
34    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
35}
36
37fn bit_length_impl<P: ArrowPrimitiveType>(
38    offsets: &OffsetBuffer<P::Native>,
39    nulls: Option<&NullBuffer>,
40) -> ArrayRef {
41    let bits = P::Native::usize_as(8);
42    let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
43    let v: Vec<_> = offsets.windows(2).map(c).collect();
44    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
45}
46
47/// Returns an array of Int32/Int64 denoting the length of each value in the array.
48///
49/// For list array, length is the number of elements in each list.
50/// For string array and binary array, length is the number of bytes of each value.
51///
52/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray/StringViewArray, BinaryArray/LargeBinaryArray, FixedSizeListArray,
53///   and ListViewArray/LargeListViewArray, or DictionaryArray with above Arrays as values, or
54///   RunEndEncoded arrays with above arrays as values
55/// * length of null is null.
56pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
57    if let Some(d) = array.as_any_dictionary_opt() {
58        let lengths = length(d.values().as_ref())?;
59        return Ok(d.with_values(lengths));
60    }
61    if let Some(ree) = array.as_any_ree_opt() {
62        let lengths = length(ree.values())?;
63        return Ok(ree.with_values(lengths));
64    }
65    match array.data_type() {
66        DataType::List(_) => {
67            let list = array.as_list::<i32>();
68            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
69        }
70        DataType::LargeList(_) => {
71            let list = array.as_list::<i64>();
72            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
73        }
74        DataType::ListView(_) => {
75            let list = array.as_list_view::<i32>();
76            Ok(Arc::new(Int32Array::new(
77                list.sizes().clone(),
78                list.nulls().cloned(),
79            )))
80        }
81        DataType::LargeListView(_) => {
82            let list = array.as_list_view::<i64>();
83            Ok(Arc::new(Int64Array::new(
84                list.sizes().clone(),
85                list.nulls().cloned(),
86            )))
87        }
88        DataType::Utf8 => {
89            let list = array.as_string::<i32>();
90            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
91        }
92        DataType::LargeUtf8 => {
93            let list = array.as_string::<i64>();
94            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
95        }
96        DataType::Utf8View => {
97            let list = array.as_string_view();
98            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
99            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
100                v.into(),
101                list.nulls().cloned(),
102            )?))
103        }
104        DataType::Binary => {
105            let list = array.as_binary::<i32>();
106            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
107        }
108        DataType::LargeBinary => {
109            let list = array.as_binary::<i64>();
110            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
111        }
112        DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
113            Int32Array::try_new(vec![*len; array.len()].into(), array.nulls().cloned())?,
114        )),
115        DataType::BinaryView => {
116            let list = array.as_binary_view();
117            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
118            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
119                v.into(),
120                list.nulls().cloned(),
121            )?))
122        }
123        other => Err(ArrowError::ComputeError(format!(
124            "length not supported for {other:?}"
125        ))),
126    }
127}
128
129/// Returns an array of Int32/Int64 denoting the number of bits in each value in the array.
130///
131/// * this only accepts StringArray/Utf8, LargeString/LargeUtf8, StringViewArray/Utf8View,
132///   BinaryArray, LargeBinaryArray, BinaryViewArray, and FixedSizeBinaryArray,
133///   or DictionaryArray/REE with above Arrays as values
134/// * bit_length of null is null.
135/// * bit_length is in number of bits
136pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
137    if let Some(d) = array.as_any_dictionary_opt() {
138        let lengths = bit_length(d.values().as_ref())?;
139        return Ok(d.with_values(lengths));
140    }
141    if let Some(ree) = array.as_any_ree_opt() {
142        let lengths = bit_length(ree.values())?;
143        return Ok(ree.with_values(lengths));
144    }
145
146    match array.data_type() {
147        DataType::Utf8 => {
148            let list = array.as_string::<i32>();
149            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
150        }
151        DataType::LargeUtf8 => {
152            let list = array.as_string::<i64>();
153            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
154        }
155        DataType::Utf8View => {
156            let list = array.as_string_view();
157            let values = list
158                .views()
159                .iter()
160                .map(|view| (*view as i32).wrapping_mul(8))
161                .collect();
162            Ok(Arc::new(Int32Array::try_new(
163                values,
164                array.nulls().cloned(),
165            )?))
166        }
167        DataType::Binary => {
168            let list = array.as_binary::<i32>();
169            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
170        }
171        DataType::LargeBinary => {
172            let list = array.as_binary::<i64>();
173            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
174        }
175        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
176            vec![*len * 8; array.len()].into(),
177            array.nulls().cloned(),
178        )?)),
179        DataType::BinaryView => {
180            let list = array.as_binary_view();
181            let values = list
182                .views()
183                .iter()
184                .map(|view| (*view as i32).wrapping_mul(8))
185                .collect();
186            Ok(Arc::new(Int32Array::try_new(
187                values,
188                array.nulls().cloned(),
189            )?))
190        }
191        other => Err(ArrowError::ComputeError(format!(
192            "bit_length not supported for {other:?}"
193        ))),
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200    use arrow_buffer::{Buffer, ScalarBuffer};
201    use arrow_data::ArrayData;
202    use arrow_schema::Field;
203
204    fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
205        // a large array
206        let values = [
207            "one",
208            "on",
209            "o",
210            "",
211            "this is a longer string to test string array with",
212        ];
213        let values = values.into_iter().cycle().take(4096).collect();
214        let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
215
216        vec![
217            (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
218            (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
219            (vec!["💖"], 1, vec![4]),
220            (values, 4096, expected),
221        ]
222    }
223
224    macro_rules! length_binary_helper {
225        ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
226            let array = GenericBinaryArray::<$offset_ty>::from($value);
227            let result = $kernel(&array).unwrap();
228            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
229            let expected: $result_ty = $expected.into();
230            assert_eq!(&expected, result);
231        }};
232    }
233
234    macro_rules! length_list_helper {
235        ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
236            let array =
237                GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
238            let result = length(&array).unwrap();
239            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
240            let expected: $result_ty = $expected.into();
241            assert_eq!(&expected, result);
242        }};
243    }
244
245    #[test]
246    fn length_test_string() {
247        length_cases_string()
248            .into_iter()
249            .for_each(|(input, len, expected)| {
250                let array = StringArray::from(input);
251                let result = length(&array).unwrap();
252                assert_eq!(len, result.len());
253                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
254                expected.iter().enumerate().for_each(|(i, value)| {
255                    assert_eq!(*value, result.value(i));
256                });
257            })
258    }
259
260    #[test]
261    fn length_test_large_string() {
262        length_cases_string()
263            .into_iter()
264            .for_each(|(input, len, expected)| {
265                let array = LargeStringArray::from(input);
266                let result = length(&array).unwrap();
267                assert_eq!(len, result.len());
268                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
269                expected.iter().enumerate().for_each(|(i, value)| {
270                    assert_eq!(*value as i64, result.value(i));
271                });
272            })
273    }
274
275    #[test]
276    fn length_test_string_view() {
277        length_cases_string()
278            .into_iter()
279            .for_each(|(input, len, expected)| {
280                let array = StringViewArray::from(input);
281                let result = length(&array).unwrap();
282                assert_eq!(len, result.len());
283                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
284                expected.iter().enumerate().for_each(|(i, value)| {
285                    assert_eq!(*value, result.value(i));
286                });
287            })
288    }
289
290    #[test]
291    fn length_test_binary() {
292        let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
293        let result: Vec<i32> = vec![4, 3, 2];
294        length_binary_helper!(i32, Int32Array, length, value, result)
295    }
296
297    #[test]
298    fn length_test_large_binary() {
299        let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
300        let result: Vec<i64> = vec![4, 2, 3];
301        length_binary_helper!(i64, Int64Array, length, value, result)
302    }
303
304    #[test]
305    fn length_test_binary_view() {
306        let value: Vec<&[u8]> = vec![
307            b"zero",
308            &[0xff, 0xf8],
309            b"two",
310            b"this is a longer string to test binary array with",
311        ];
312        let expected: Vec<i32> = vec![4, 2, 3, 49];
313
314        let array = BinaryViewArray::from(value);
315        let result = length(&array).unwrap();
316        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
317        let expected: Int32Array = expected.into();
318        assert_eq!(&expected, result);
319    }
320
321    #[test]
322    fn length_test_list() {
323        let value = vec![
324            Some(vec![]),
325            Some(vec![Some(1), Some(2), Some(4)]),
326            Some(vec![Some(0)]),
327        ];
328        let result: Vec<i32> = vec![0, 3, 1];
329        length_list_helper!(i32, Int32Array, Int32Type, value, result)
330    }
331
332    #[test]
333    fn length_test_large_list() {
334        let value = vec![
335            Some(vec![]),
336            Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
337            Some(vec![None]),
338        ];
339        let result: Vec<i64> = vec![0, 3, 1];
340        length_list_helper!(i64, Int64Array, Float32Type, value, result)
341    }
342
343    type OptionStr = Option<&'static str>;
344
345    fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
346        vec![(
347            vec![Some("one"), None, Some("three"), Some("four")],
348            4,
349            vec![Some(3), None, Some(5), Some(4)],
350        )]
351    }
352
353    #[test]
354    fn length_null_string() {
355        length_null_cases_string()
356            .into_iter()
357            .for_each(|(input, len, expected)| {
358                let array = StringArray::from(input);
359                let result = length(&array).unwrap();
360                assert_eq!(len, result.len());
361                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
362
363                let expected: Int32Array = expected.into();
364                assert_eq!(&expected, result);
365            })
366    }
367
368    #[test]
369    fn length_null_large_string() {
370        length_null_cases_string()
371            .into_iter()
372            .for_each(|(input, len, expected)| {
373                let array = LargeStringArray::from(input);
374                let result = length(&array).unwrap();
375                assert_eq!(len, result.len());
376                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
377
378                // convert to i64
379                let expected: Int64Array = expected
380                    .iter()
381                    .map(|e| e.map(|e| e as i64))
382                    .collect::<Vec<_>>()
383                    .into();
384                assert_eq!(&expected, result);
385            })
386    }
387
388    #[test]
389    fn length_null_binary() {
390        let value: Vec<Option<&[u8]>> =
391            vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
392        let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
393        length_binary_helper!(i32, Int32Array, length, value, result)
394    }
395
396    #[test]
397    fn length_null_large_binary() {
398        let value: Vec<Option<&[u8]>> =
399            vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
400        let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
401        length_binary_helper!(i64, Int64Array, length, value, result)
402    }
403
404    #[test]
405    fn length_null_list() {
406        let value = vec![
407            Some(vec![]),
408            None,
409            Some(vec![Some(1), None, Some(2), Some(4)]),
410            Some(vec![Some(0)]),
411        ];
412        let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
413        length_list_helper!(i32, Int32Array, Int8Type, value, result)
414    }
415
416    #[test]
417    fn length_null_large_list() {
418        let value = vec![
419            Some(vec![]),
420            None,
421            Some(vec![Some(1.1), None, Some(4.0)]),
422            Some(vec![Some(0.1)]),
423        ];
424        let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
425        length_list_helper!(i64, Int64Array, Float32Type, value, result)
426    }
427
428    #[test]
429    fn length_test_list_view() {
430        // Create a ListViewArray with values [0, 1, 2], [3, 4, 5], [6, 7]
431        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
432        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
433        let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
434        let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
435        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
436
437        let result = length(&list_array).unwrap();
438        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
439        let expected: Int32Array = vec![3, 3, 2].into();
440        assert_eq!(&expected, result);
441    }
442
443    #[test]
444    fn length_test_large_list_view() {
445        // Create a LargeListViewArray with values [0, 1, 2], [3, 4, 5], [6, 7]
446        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
447        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
448        let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
449        let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
450        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
451
452        let result = length(&list_array).unwrap();
453        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
454        let expected: Int64Array = vec![3i64, 3, 2].into();
455        assert_eq!(&expected, result);
456    }
457
458    #[test]
459    fn length_null_list_view() {
460        // Create a ListViewArray with nulls: [], null, [1, 2, 3, 4], [0]
461        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
462        let values = Int32Array::from(vec![1, 2, 3, 4, 0]);
463        let offsets = ScalarBuffer::from(vec![0i32, 0, 0, 4]);
464        let sizes = ScalarBuffer::from(vec![0i32, 0, 4, 1]);
465        let nulls = NullBuffer::from(vec![true, false, true, true]);
466        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
467
468        let result = length(&list_array).unwrap();
469        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
470        let expected: Int32Array = vec![Some(0), None, Some(4), Some(1)].into();
471        assert_eq!(&expected, result);
472    }
473
474    #[test]
475    fn length_null_large_list_view() {
476        // Create a LargeListViewArray with nulls: [], null, [1.0, 2.0, 3.0], [0.1]
477        let field = Arc::new(Field::new_list_field(DataType::Float32, true));
478        let values = Float32Array::from(vec![1.0, 2.0, 3.0, 0.1]);
479        let offsets = ScalarBuffer::from(vec![0i64, 0, 0, 3]);
480        let sizes = ScalarBuffer::from(vec![0i64, 0, 3, 1]);
481        let nulls = NullBuffer::from(vec![true, false, true, true]);
482        let list_array =
483            LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
484
485        let result = length(&list_array).unwrap();
486        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
487        let expected: Int64Array = vec![Some(0i64), None, Some(3), Some(1)].into();
488        assert_eq!(&expected, result);
489    }
490
491    /// Tests that length is not valid for u64.
492    #[test]
493    fn length_wrong_type() {
494        let array: UInt64Array = vec![1u64].into();
495
496        assert!(length(&array).is_err());
497    }
498
499    /// Tests with an offset
500    #[test]
501    fn length_offsets_string() {
502        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
503        let b = a.slice(1, 3);
504        let result = length(&b).unwrap();
505        let result: &Int32Array = result.as_primitive();
506
507        let expected = Int32Array::from(vec![Some(1), Some(5), None]);
508        assert_eq!(&expected, result);
509    }
510
511    #[test]
512    fn length_offsets_binary() {
513        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
514        let a = BinaryArray::from(value);
515        let b = a.slice(1, 3);
516        let result = length(&b).unwrap();
517        let result: &Int32Array = result.as_primitive();
518
519        let expected = Int32Array::from(vec![Some(1), Some(2), None]);
520        assert_eq!(&expected, result);
521    }
522
523    fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
524        // a large array
525        let values = ["one", "on", "o", ""];
526        let values = values.into_iter().cycle().take(4096).collect();
527        let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
528
529        vec![
530            (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
531            (vec!["💖"], 1, vec![32]),
532            (vec!["josé"], 1, vec![40]),
533            (values, 4096, expected),
534        ]
535    }
536
537    #[test]
538    fn bit_length_test_string() {
539        bit_length_cases()
540            .into_iter()
541            .for_each(|(input, len, expected)| {
542                let array = StringArray::from(input);
543                let result = bit_length(&array).unwrap();
544                assert_eq!(len, result.len());
545                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
546                expected.iter().enumerate().for_each(|(i, value)| {
547                    assert_eq!(*value, result.value(i));
548                });
549            })
550    }
551
552    #[test]
553    fn bit_length_test_large_string() {
554        bit_length_cases()
555            .into_iter()
556            .for_each(|(input, len, expected)| {
557                let array = LargeStringArray::from(input);
558                let result = bit_length(&array).unwrap();
559                assert_eq!(len, result.len());
560                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
561                expected.iter().enumerate().for_each(|(i, value)| {
562                    assert_eq!(*value as i64, result.value(i));
563                });
564            })
565    }
566
567    #[test]
568    fn bit_length_test_utf8view() {
569        bit_length_cases()
570            .into_iter()
571            .for_each(|(input, len, expected)| {
572                let string_array = StringViewArray::from(input);
573                let result = bit_length(&string_array).unwrap();
574                assert_eq!(len, result.len());
575                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
576                expected.iter().enumerate().for_each(|(i, value)| {
577                    assert_eq!(*value, result.value(i));
578                });
579            })
580    }
581
582    #[test]
583    fn bit_length_null_utf8view() {
584        bit_length_null_cases()
585            .into_iter()
586            .for_each(|(input, len, expected)| {
587                let array = StringArray::from(input);
588                let result = bit_length(&array).unwrap();
589                assert_eq!(len, result.len());
590                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
591
592                let expected: Int32Array = expected.into();
593                assert_eq!(&expected, result);
594            })
595    }
596    #[test]
597    fn bit_length_binary() {
598        let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
599        let expected: Vec<i32> = vec![24, 16, 40];
600        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
601    }
602
603    #[test]
604    fn bit_length_large_binary() {
605        let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
606        let expected: Vec<i64> = vec![32, 8, 16];
607        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
608    }
609
610    #[test]
611    fn bit_length_binary_view() {
612        let value: Vec<&[u8]> = vec![
613            b"zero",
614            &[0xff, 0xf8],
615            b"two",
616            b"this is a longer string to test binary array with",
617        ];
618        let expected: Vec<i32> = vec![32, 16, 24, 392];
619
620        let array = BinaryViewArray::from(value);
621        let result = bit_length(&array).unwrap();
622        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
623        let expected: Int32Array = expected.into();
624        assert_eq!(&expected, result);
625    }
626
627    #[test]
628    fn bit_length_null_binary_view() {
629        let value: Vec<Option<&[u8]>> =
630            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
631        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
632
633        let array = BinaryViewArray::from(value);
634        let result = bit_length(&array).unwrap();
635        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
636        let expected: Int32Array = expected.into();
637        assert_eq!(&expected, result);
638    }
639
640    fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
641        vec![(
642            vec![Some("one"), None, Some("three"), Some("four")],
643            4,
644            vec![Some(24), None, Some(40), Some(32)],
645        )]
646    }
647
648    #[test]
649    fn bit_length_null_string() {
650        bit_length_null_cases()
651            .into_iter()
652            .for_each(|(input, len, expected)| {
653                let array = StringArray::from(input);
654                let result = bit_length(&array).unwrap();
655                assert_eq!(len, result.len());
656                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
657
658                let expected: Int32Array = expected.into();
659                assert_eq!(&expected, result);
660            })
661    }
662
663    #[test]
664    fn bit_length_null_large_string() {
665        bit_length_null_cases()
666            .into_iter()
667            .for_each(|(input, len, expected)| {
668                let array = LargeStringArray::from(input);
669                let result = bit_length(&array).unwrap();
670                assert_eq!(len, result.len());
671                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
672
673                // convert to i64
674                let expected: Int64Array = expected
675                    .iter()
676                    .map(|e| e.map(|e| e as i64))
677                    .collect::<Vec<_>>()
678                    .into();
679                assert_eq!(&expected, result);
680            })
681    }
682
683    #[test]
684    fn bit_length_null_binary() {
685        let value: Vec<Option<&[u8]>> =
686            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
687        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
688        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
689    }
690
691    #[test]
692    fn bit_length_null_large_binary() {
693        let value: Vec<Option<&[u8]>> =
694            vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
695        let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
696        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
697    }
698
699    /// Tests that bit_length is not valid for u64.
700    #[test]
701    fn bit_length_wrong_type() {
702        let array: UInt64Array = vec![1u64].into();
703
704        assert!(bit_length(&array).is_err());
705    }
706
707    /// Tests with an offset
708    #[test]
709    fn bit_length_offsets_string() {
710        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
711        let b = a.slice(1, 3);
712        let result = bit_length(&b).unwrap();
713        let result: &Int32Array = result.as_primitive();
714
715        let expected = Int32Array::from(vec![Some(8), Some(40), None]);
716        assert_eq!(&expected, result);
717    }
718
719    #[test]
720    fn bit_length_offsets_binary() {
721        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
722        let a = BinaryArray::from(value);
723        let b = a.slice(1, 3);
724        let result = bit_length(&b).unwrap();
725        let result: &Int32Array = result.as_primitive();
726
727        let expected = Int32Array::from(vec![Some(0), Some(40), None]);
728        assert_eq!(&expected, result);
729    }
730
731    #[test]
732    fn length_dictionary() {
733        _length_dictionary::<Int8Type>();
734        _length_dictionary::<Int16Type>();
735        _length_dictionary::<Int32Type>();
736        _length_dictionary::<Int64Type>();
737        _length_dictionary::<UInt8Type>();
738        _length_dictionary::<UInt16Type>();
739        _length_dictionary::<UInt32Type>();
740        _length_dictionary::<UInt64Type>();
741    }
742
743    fn _length_dictionary<K: ArrowDictionaryKeyType>() {
744        const TOTAL: i32 = 100;
745
746        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
747        let data: Vec<Option<&str>> = (0..TOTAL)
748            .map(|n| {
749                let i = n % 5;
750                if i == 3 { None } else { Some(v[i as usize]) }
751            })
752            .collect();
753
754        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
755
756        let expected: Vec<Option<i32>> =
757            data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
758
759        let res = length(&dict_array).unwrap();
760        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
761        let actual: Vec<Option<i32>> = actual
762            .values()
763            .as_any()
764            .downcast_ref::<Int32Array>()
765            .unwrap()
766            .take_iter(dict_array.keys_iter())
767            .collect();
768
769        for i in 0..TOTAL as usize {
770            assert_eq!(expected[i], actual[i],);
771        }
772    }
773
774    #[test]
775    fn bit_length_dictionary() {
776        _bit_length_dictionary::<Int8Type>();
777        _bit_length_dictionary::<Int16Type>();
778        _bit_length_dictionary::<Int32Type>();
779        _bit_length_dictionary::<Int64Type>();
780        _bit_length_dictionary::<UInt8Type>();
781        _bit_length_dictionary::<UInt16Type>();
782        _bit_length_dictionary::<UInt32Type>();
783        _bit_length_dictionary::<UInt64Type>();
784    }
785
786    fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
787        const TOTAL: i32 = 100;
788
789        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
790        let data: Vec<Option<&str>> = (0..TOTAL)
791            .map(|n| {
792                let i = n % 5;
793                if i == 3 { None } else { Some(v[i as usize]) }
794            })
795            .collect();
796
797        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
798
799        let expected: Vec<Option<i32>> = data
800            .iter()
801            .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
802            .collect();
803
804        let res = bit_length(&dict_array).unwrap();
805        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
806        let actual: Vec<Option<i32>> = actual
807            .values()
808            .as_any()
809            .downcast_ref::<Int32Array>()
810            .unwrap()
811            .take_iter(dict_array.keys_iter())
812            .collect();
813
814        for i in 0..TOTAL as usize {
815            assert_eq!(expected[i], actual[i],);
816        }
817    }
818
819    #[test]
820    fn test_fixed_size_list_length() {
821        // Construct a value array
822        let value_data = ArrayData::builder(DataType::Int32)
823            .len(9)
824            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
825            .build()
826            .unwrap();
827        let list_data_type =
828            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
829        let nulls = NullBuffer::from(vec![true, false, true]);
830        let list_data = ArrayData::builder(list_data_type)
831            .len(3)
832            .add_child_data(value_data)
833            .nulls(Some(nulls))
834            .build()
835            .unwrap();
836        let list_array = FixedSizeListArray::from(list_data);
837
838        let lengths = length(&list_array).unwrap();
839        let lengths = lengths.as_primitive::<Int32Type>();
840
841        assert_eq!(lengths.len(), 3);
842        assert_eq!(lengths.value(0), 3);
843        assert!(lengths.is_null(1));
844        assert_eq!(lengths.value(2), 3);
845    }
846
847    #[test]
848    fn test_fixed_size_binary() {
849        let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
850        let result = length(&array).unwrap();
851        assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
852
853        let result = bit_length(&array).unwrap();
854        assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
855    }
856    #[test]
857    fn length_test_ree_string_values() {
858        use arrow_array::RunArray;
859        use arrow_array::types::Int32Type;
860
861        let string_values = StringArray::from(vec!["hello", "owl", "test", "arrow", "a"]);
862        let run_ends = PrimitiveArray::<Int32Type>::from(vec![2i32, 5, 9, 11, 14]);
863        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &string_values).unwrap();
864
865        let result = length(&ree_array).unwrap();
866        let result = result
867            .as_any()
868            .downcast_ref::<RunArray<Int32Type>>()
869            .unwrap();
870
871        let result_values = result
872            .values()
873            .as_any()
874            .downcast_ref::<Int32Array>()
875            .unwrap();
876
877        let expected: Int32Array = vec![5, 3, 4, 5, 1].into();
878        assert_eq!(&expected, result_values);
879    }
880    #[test]
881    fn length_test_ree_invalid_type_early_fail() {
882        use arrow_array::RunArray;
883        use arrow_array::types::Int32Type;
884
885        let uint64_values = UInt64Array::from(vec![1u64, 2, 3]);
886        let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
887        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &uint64_values).unwrap();
888
889        assert!(length(&ree_array).is_err());
890    }
891
892    #[test]
893    fn bit_length_test_ree_utf8() {
894        use arrow_array::RunArray;
895        use arrow_array::types::Int32Type;
896
897        let strings = StringArray::from(vec!["hello", "world", "test"]);
898        let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
899        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &strings).unwrap();
900
901        let result = bit_length(&ree_array).unwrap();
902        let result_values = result
903            .as_any()
904            .downcast_ref::<RunArray<Int32Type>>()
905            .unwrap()
906            .values()
907            .as_any()
908            .downcast_ref::<Int32Array>()
909            .unwrap();
910
911        let expected: Int32Array = vec![40, 40, 32].into();
912        assert_eq!(&expected, result_values);
913    }
914}