Skip to main content

arrow_string/
length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines kernel for length of string arrays and binary arrays
19
20use arrow_array::ree_map;
21use arrow_array::*;
22use arrow_array::{cast::AsArray, types::*};
23use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
24use arrow_schema::{ArrowError, DataType};
25use std::sync::Arc;
26
27fn length_impl<P: ArrowPrimitiveType>(
28    offsets: &OffsetBuffer<P::Native>,
29    nulls: Option<&NullBuffer>,
30) -> ArrayRef {
31    let v: Vec<_> = offsets
32        .windows(2)
33        .map(|w| w[1].sub_wrapping(w[0]))
34        .collect();
35    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
36}
37
38fn bit_length_impl<P: ArrowPrimitiveType>(
39    offsets: &OffsetBuffer<P::Native>,
40    nulls: Option<&NullBuffer>,
41) -> ArrayRef {
42    let bits = P::Native::usize_as(8);
43    let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
44    let v: Vec<_> = offsets.windows(2).map(c).collect();
45    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
46}
47
48/// Returns an array of Int32/Int64 denoting the length of each value in the array.
49///
50/// For list array, length is the number of elements in each list.
51/// For string array and binary array, length is the number of bytes of each value.
52///
53/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray/StringViewArray, BinaryArray/LargeBinaryArray, FixedSizeListArray,
54///   and ListViewArray/LargeListViewArray, or DictionaryArray with above Arrays as values, or
55///   RunEndEncoded arrays with above arrays as values
56/// * length of null is null.
57pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
58    if let Some(d) = array.as_any_dictionary_opt() {
59        let lengths = length(d.values().as_ref())?;
60        return Ok(d.with_values(lengths));
61    }
62    match array.data_type() {
63        DataType::List(_) => {
64            let list = array.as_list::<i32>();
65            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
66        }
67        DataType::LargeList(_) => {
68            let list = array.as_list::<i64>();
69            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
70        }
71        DataType::ListView(_) => {
72            let list = array.as_list_view::<i32>();
73            Ok(Arc::new(Int32Array::new(
74                list.sizes().clone(),
75                list.nulls().cloned(),
76            )))
77        }
78        DataType::LargeListView(_) => {
79            let list = array.as_list_view::<i64>();
80            Ok(Arc::new(Int64Array::new(
81                list.sizes().clone(),
82                list.nulls().cloned(),
83            )))
84        }
85        DataType::Utf8 => {
86            let list = array.as_string::<i32>();
87            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
88        }
89        DataType::LargeUtf8 => {
90            let list = array.as_string::<i64>();
91            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
92        }
93        DataType::Utf8View => {
94            let list = array.as_string_view();
95            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
96            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
97                v.into(),
98                list.nulls().cloned(),
99            )?))
100        }
101        DataType::Binary => {
102            let list = array.as_binary::<i32>();
103            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
104        }
105        DataType::LargeBinary => {
106            let list = array.as_binary::<i64>();
107            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
108        }
109        DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
110            Int32Array::try_new(vec![*len; array.len()].into(), array.nulls().cloned())?,
111        )),
112        DataType::BinaryView => {
113            let list = array.as_binary_view();
114            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
115            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
116                v.into(),
117                list.nulls().cloned(),
118            )?))
119        }
120        DataType::RunEndEncoded(k, _) => match k.data_type() {
121            DataType::Int16 => ree_map!(array, Int16Type, length),
122            DataType::Int32 => ree_map!(array, Int32Type, length),
123            DataType::Int64 => ree_map!(array, Int64Type, length),
124            _ => Err(ArrowError::InvalidArgumentError(format!(
125                "Invalid run-end type: {:?}",
126                k.data_type()
127            ))),
128        },
129        other => Err(ArrowError::ComputeError(format!(
130            "length not supported for {other:?}"
131        ))),
132    }
133}
134
135/// Returns an array of Int32/Int64 denoting the number of bits in each value in the array.
136///
137/// * this only accepts StringArray/Utf8, LargeString/LargeUtf8, StringViewArray/Utf8View,
138///   BinaryArray, LargeBinaryArray, BinaryViewArray, and FixedSizeBinaryArray,
139///   or DictionaryArray/REE with above Arrays as values
140/// * bit_length of null is null.
141/// * bit_length is in number of bits
142pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
143    if let Some(d) = array.as_any_dictionary_opt() {
144        let lengths = bit_length(d.values().as_ref())?;
145        return Ok(d.with_values(lengths));
146    }
147
148    match array.data_type() {
149        DataType::Utf8 => {
150            let list = array.as_string::<i32>();
151            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
152        }
153        DataType::LargeUtf8 => {
154            let list = array.as_string::<i64>();
155            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
156        }
157        DataType::Utf8View => {
158            let list = array.as_string_view();
159            let values = list
160                .views()
161                .iter()
162                .map(|view| (*view as i32).wrapping_mul(8))
163                .collect();
164            Ok(Arc::new(Int32Array::try_new(
165                values,
166                array.nulls().cloned(),
167            )?))
168        }
169        DataType::Binary => {
170            let list = array.as_binary::<i32>();
171            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
172        }
173        DataType::LargeBinary => {
174            let list = array.as_binary::<i64>();
175            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
176        }
177        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
178            vec![*len * 8; array.len()].into(),
179            array.nulls().cloned(),
180        )?)),
181        DataType::BinaryView => {
182            let list = array.as_binary_view();
183            let values = list
184                .views()
185                .iter()
186                .map(|view| (*view as i32).wrapping_mul(8))
187                .collect();
188            Ok(Arc::new(Int32Array::try_new(
189                values,
190                array.nulls().cloned(),
191            )?))
192        }
193        DataType::RunEndEncoded(k, _) => match k.data_type() {
194            DataType::Int16 => ree_map!(array, Int16Type, bit_length),
195            DataType::Int32 => ree_map!(array, Int32Type, bit_length),
196            DataType::Int64 => ree_map!(array, Int64Type, bit_length),
197            _ => Err(ArrowError::InvalidArgumentError(format!(
198                "Invalid run-end type: {:?}",
199                k.data_type()
200            ))),
201        },
202        other => Err(ArrowError::ComputeError(format!(
203            "bit_length not supported for {other:?}"
204        ))),
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211    use arrow_buffer::{Buffer, ScalarBuffer};
212    use arrow_data::ArrayData;
213    use arrow_schema::Field;
214
215    fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
216        // a large array
217        let values = [
218            "one",
219            "on",
220            "o",
221            "",
222            "this is a longer string to test string array with",
223        ];
224        let values = values.into_iter().cycle().take(4096).collect();
225        let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
226
227        vec![
228            (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
229            (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
230            (vec!["💖"], 1, vec![4]),
231            (values, 4096, expected),
232        ]
233    }
234
235    macro_rules! length_binary_helper {
236        ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
237            let array = GenericBinaryArray::<$offset_ty>::from($value);
238            let result = $kernel(&array).unwrap();
239            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
240            let expected: $result_ty = $expected.into();
241            assert_eq!(&expected, result);
242        }};
243    }
244
245    macro_rules! length_list_helper {
246        ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
247            let array =
248                GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
249            let result = length(&array).unwrap();
250            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
251            let expected: $result_ty = $expected.into();
252            assert_eq!(&expected, result);
253        }};
254    }
255
256    #[test]
257    fn length_test_string() {
258        length_cases_string()
259            .into_iter()
260            .for_each(|(input, len, expected)| {
261                let array = StringArray::from(input);
262                let result = length(&array).unwrap();
263                assert_eq!(len, result.len());
264                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
265                expected.iter().enumerate().for_each(|(i, value)| {
266                    assert_eq!(*value, result.value(i));
267                });
268            })
269    }
270
271    #[test]
272    fn length_test_large_string() {
273        length_cases_string()
274            .into_iter()
275            .for_each(|(input, len, expected)| {
276                let array = LargeStringArray::from(input);
277                let result = length(&array).unwrap();
278                assert_eq!(len, result.len());
279                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
280                expected.iter().enumerate().for_each(|(i, value)| {
281                    assert_eq!(*value as i64, result.value(i));
282                });
283            })
284    }
285
286    #[test]
287    fn length_test_string_view() {
288        length_cases_string()
289            .into_iter()
290            .for_each(|(input, len, expected)| {
291                let array = StringViewArray::from(input);
292                let result = length(&array).unwrap();
293                assert_eq!(len, result.len());
294                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
295                expected.iter().enumerate().for_each(|(i, value)| {
296                    assert_eq!(*value, result.value(i));
297                });
298            })
299    }
300
301    #[test]
302    fn length_test_binary() {
303        let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
304        let result: Vec<i32> = vec![4, 3, 2];
305        length_binary_helper!(i32, Int32Array, length, value, result)
306    }
307
308    #[test]
309    fn length_test_large_binary() {
310        let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
311        let result: Vec<i64> = vec![4, 2, 3];
312        length_binary_helper!(i64, Int64Array, length, value, result)
313    }
314
315    #[test]
316    fn length_test_binary_view() {
317        let value: Vec<&[u8]> = vec![
318            b"zero",
319            &[0xff, 0xf8],
320            b"two",
321            b"this is a longer string to test binary array with",
322        ];
323        let expected: Vec<i32> = vec![4, 2, 3, 49];
324
325        let array = BinaryViewArray::from(value);
326        let result = length(&array).unwrap();
327        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
328        let expected: Int32Array = expected.into();
329        assert_eq!(&expected, result);
330    }
331
332    #[test]
333    fn length_test_list() {
334        let value = vec![
335            Some(vec![]),
336            Some(vec![Some(1), Some(2), Some(4)]),
337            Some(vec![Some(0)]),
338        ];
339        let result: Vec<i32> = vec![0, 3, 1];
340        length_list_helper!(i32, Int32Array, Int32Type, value, result)
341    }
342
343    #[test]
344    fn length_test_large_list() {
345        let value = vec![
346            Some(vec![]),
347            Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
348            Some(vec![None]),
349        ];
350        let result: Vec<i64> = vec![0, 3, 1];
351        length_list_helper!(i64, Int64Array, Float32Type, value, result)
352    }
353
354    type OptionStr = Option<&'static str>;
355
356    fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
357        vec![(
358            vec![Some("one"), None, Some("three"), Some("four")],
359            4,
360            vec![Some(3), None, Some(5), Some(4)],
361        )]
362    }
363
364    #[test]
365    fn length_null_string() {
366        length_null_cases_string()
367            .into_iter()
368            .for_each(|(input, len, expected)| {
369                let array = StringArray::from(input);
370                let result = length(&array).unwrap();
371                assert_eq!(len, result.len());
372                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
373
374                let expected: Int32Array = expected.into();
375                assert_eq!(&expected, result);
376            })
377    }
378
379    #[test]
380    fn length_null_large_string() {
381        length_null_cases_string()
382            .into_iter()
383            .for_each(|(input, len, expected)| {
384                let array = LargeStringArray::from(input);
385                let result = length(&array).unwrap();
386                assert_eq!(len, result.len());
387                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
388
389                // convert to i64
390                let expected: Int64Array = expected
391                    .iter()
392                    .map(|e| e.map(|e| e as i64))
393                    .collect::<Vec<_>>()
394                    .into();
395                assert_eq!(&expected, result);
396            })
397    }
398
399    #[test]
400    fn length_null_binary() {
401        let value: Vec<Option<&[u8]>> =
402            vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
403        let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
404        length_binary_helper!(i32, Int32Array, length, value, result)
405    }
406
407    #[test]
408    fn length_null_large_binary() {
409        let value: Vec<Option<&[u8]>> =
410            vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
411        let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
412        length_binary_helper!(i64, Int64Array, length, value, result)
413    }
414
415    #[test]
416    fn length_null_list() {
417        let value = vec![
418            Some(vec![]),
419            None,
420            Some(vec![Some(1), None, Some(2), Some(4)]),
421            Some(vec![Some(0)]),
422        ];
423        let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
424        length_list_helper!(i32, Int32Array, Int8Type, value, result)
425    }
426
427    #[test]
428    fn length_null_large_list() {
429        let value = vec![
430            Some(vec![]),
431            None,
432            Some(vec![Some(1.1), None, Some(4.0)]),
433            Some(vec![Some(0.1)]),
434        ];
435        let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
436        length_list_helper!(i64, Int64Array, Float32Type, value, result)
437    }
438
439    #[test]
440    fn length_test_list_view() {
441        // Create a ListViewArray with values [0, 1, 2], [3, 4, 5], [6, 7]
442        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
443        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
444        let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
445        let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
446        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
447
448        let result = length(&list_array).unwrap();
449        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
450        let expected: Int32Array = vec![3, 3, 2].into();
451        assert_eq!(&expected, result);
452    }
453
454    #[test]
455    fn length_test_large_list_view() {
456        // Create a LargeListViewArray with values [0, 1, 2], [3, 4, 5], [6, 7]
457        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
458        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
459        let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
460        let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
461        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
462
463        let result = length(&list_array).unwrap();
464        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
465        let expected: Int64Array = vec![3i64, 3, 2].into();
466        assert_eq!(&expected, result);
467    }
468
469    #[test]
470    fn length_null_list_view() {
471        // Create a ListViewArray with nulls: [], null, [1, 2, 3, 4], [0]
472        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
473        let values = Int32Array::from(vec![1, 2, 3, 4, 0]);
474        let offsets = ScalarBuffer::from(vec![0i32, 0, 0, 4]);
475        let sizes = ScalarBuffer::from(vec![0i32, 0, 4, 1]);
476        let nulls = NullBuffer::from(vec![true, false, true, true]);
477        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
478
479        let result = length(&list_array).unwrap();
480        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
481        let expected: Int32Array = vec![Some(0), None, Some(4), Some(1)].into();
482        assert_eq!(&expected, result);
483    }
484
485    #[test]
486    fn length_null_large_list_view() {
487        // Create a LargeListViewArray with nulls: [], null, [1.0, 2.0, 3.0], [0.1]
488        let field = Arc::new(Field::new_list_field(DataType::Float32, true));
489        let values = Float32Array::from(vec![1.0, 2.0, 3.0, 0.1]);
490        let offsets = ScalarBuffer::from(vec![0i64, 0, 0, 3]);
491        let sizes = ScalarBuffer::from(vec![0i64, 0, 3, 1]);
492        let nulls = NullBuffer::from(vec![true, false, true, true]);
493        let list_array =
494            LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
495
496        let result = length(&list_array).unwrap();
497        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
498        let expected: Int64Array = vec![Some(0i64), None, Some(3), Some(1)].into();
499        assert_eq!(&expected, result);
500    }
501
502    /// Tests that length is not valid for u64.
503    #[test]
504    fn length_wrong_type() {
505        let array: UInt64Array = vec![1u64].into();
506
507        assert!(length(&array).is_err());
508    }
509
510    /// Tests with an offset
511    #[test]
512    fn length_offsets_string() {
513        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
514        let b = a.slice(1, 3);
515        let result = length(&b).unwrap();
516        let result: &Int32Array = result.as_primitive();
517
518        let expected = Int32Array::from(vec![Some(1), Some(5), None]);
519        assert_eq!(&expected, result);
520    }
521
522    #[test]
523    fn length_offsets_binary() {
524        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
525        let a = BinaryArray::from(value);
526        let b = a.slice(1, 3);
527        let result = length(&b).unwrap();
528        let result: &Int32Array = result.as_primitive();
529
530        let expected = Int32Array::from(vec![Some(1), Some(2), None]);
531        assert_eq!(&expected, result);
532    }
533
534    fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
535        // a large array
536        let values = ["one", "on", "o", ""];
537        let values = values.into_iter().cycle().take(4096).collect();
538        let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
539
540        vec![
541            (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
542            (vec!["💖"], 1, vec![32]),
543            (vec!["josé"], 1, vec![40]),
544            (values, 4096, expected),
545        ]
546    }
547
548    #[test]
549    fn bit_length_test_string() {
550        bit_length_cases()
551            .into_iter()
552            .for_each(|(input, len, expected)| {
553                let array = StringArray::from(input);
554                let result = bit_length(&array).unwrap();
555                assert_eq!(len, result.len());
556                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
557                expected.iter().enumerate().for_each(|(i, value)| {
558                    assert_eq!(*value, result.value(i));
559                });
560            })
561    }
562
563    #[test]
564    fn bit_length_test_large_string() {
565        bit_length_cases()
566            .into_iter()
567            .for_each(|(input, len, expected)| {
568                let array = LargeStringArray::from(input);
569                let result = bit_length(&array).unwrap();
570                assert_eq!(len, result.len());
571                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
572                expected.iter().enumerate().for_each(|(i, value)| {
573                    assert_eq!(*value as i64, result.value(i));
574                });
575            })
576    }
577
578    #[test]
579    fn bit_length_test_utf8view() {
580        bit_length_cases()
581            .into_iter()
582            .for_each(|(input, len, expected)| {
583                let string_array = StringViewArray::from(input);
584                let result = bit_length(&string_array).unwrap();
585                assert_eq!(len, result.len());
586                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
587                expected.iter().enumerate().for_each(|(i, value)| {
588                    assert_eq!(*value, result.value(i));
589                });
590            })
591    }
592
593    #[test]
594    fn bit_length_null_utf8view() {
595        bit_length_null_cases()
596            .into_iter()
597            .for_each(|(input, len, expected)| {
598                let array = StringArray::from(input);
599                let result = bit_length(&array).unwrap();
600                assert_eq!(len, result.len());
601                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
602
603                let expected: Int32Array = expected.into();
604                assert_eq!(&expected, result);
605            })
606    }
607    #[test]
608    fn bit_length_binary() {
609        let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
610        let expected: Vec<i32> = vec![24, 16, 40];
611        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
612    }
613
614    #[test]
615    fn bit_length_large_binary() {
616        let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
617        let expected: Vec<i64> = vec![32, 8, 16];
618        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
619    }
620
621    #[test]
622    fn bit_length_binary_view() {
623        let value: Vec<&[u8]> = vec![
624            b"zero",
625            &[0xff, 0xf8],
626            b"two",
627            b"this is a longer string to test binary array with",
628        ];
629        let expected: Vec<i32> = vec![32, 16, 24, 392];
630
631        let array = BinaryViewArray::from(value);
632        let result = bit_length(&array).unwrap();
633        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
634        let expected: Int32Array = expected.into();
635        assert_eq!(&expected, result);
636    }
637
638    #[test]
639    fn bit_length_null_binary_view() {
640        let value: Vec<Option<&[u8]>> =
641            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
642        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
643
644        let array = BinaryViewArray::from(value);
645        let result = bit_length(&array).unwrap();
646        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
647        let expected: Int32Array = expected.into();
648        assert_eq!(&expected, result);
649    }
650
651    fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
652        vec![(
653            vec![Some("one"), None, Some("three"), Some("four")],
654            4,
655            vec![Some(24), None, Some(40), Some(32)],
656        )]
657    }
658
659    #[test]
660    fn bit_length_null_string() {
661        bit_length_null_cases()
662            .into_iter()
663            .for_each(|(input, len, expected)| {
664                let array = StringArray::from(input);
665                let result = bit_length(&array).unwrap();
666                assert_eq!(len, result.len());
667                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
668
669                let expected: Int32Array = expected.into();
670                assert_eq!(&expected, result);
671            })
672    }
673
674    #[test]
675    fn bit_length_null_large_string() {
676        bit_length_null_cases()
677            .into_iter()
678            .for_each(|(input, len, expected)| {
679                let array = LargeStringArray::from(input);
680                let result = bit_length(&array).unwrap();
681                assert_eq!(len, result.len());
682                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
683
684                // convert to i64
685                let expected: Int64Array = expected
686                    .iter()
687                    .map(|e| e.map(|e| e as i64))
688                    .collect::<Vec<_>>()
689                    .into();
690                assert_eq!(&expected, result);
691            })
692    }
693
694    #[test]
695    fn bit_length_null_binary() {
696        let value: Vec<Option<&[u8]>> =
697            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
698        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
699        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
700    }
701
702    #[test]
703    fn bit_length_null_large_binary() {
704        let value: Vec<Option<&[u8]>> =
705            vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
706        let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
707        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
708    }
709
710    /// Tests that bit_length is not valid for u64.
711    #[test]
712    fn bit_length_wrong_type() {
713        let array: UInt64Array = vec![1u64].into();
714
715        assert!(bit_length(&array).is_err());
716    }
717
718    /// Tests with an offset
719    #[test]
720    fn bit_length_offsets_string() {
721        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
722        let b = a.slice(1, 3);
723        let result = bit_length(&b).unwrap();
724        let result: &Int32Array = result.as_primitive();
725
726        let expected = Int32Array::from(vec![Some(8), Some(40), None]);
727        assert_eq!(&expected, result);
728    }
729
730    #[test]
731    fn bit_length_offsets_binary() {
732        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
733        let a = BinaryArray::from(value);
734        let b = a.slice(1, 3);
735        let result = bit_length(&b).unwrap();
736        let result: &Int32Array = result.as_primitive();
737
738        let expected = Int32Array::from(vec![Some(0), Some(40), None]);
739        assert_eq!(&expected, result);
740    }
741
742    #[test]
743    fn length_dictionary() {
744        _length_dictionary::<Int8Type>();
745        _length_dictionary::<Int16Type>();
746        _length_dictionary::<Int32Type>();
747        _length_dictionary::<Int64Type>();
748        _length_dictionary::<UInt8Type>();
749        _length_dictionary::<UInt16Type>();
750        _length_dictionary::<UInt32Type>();
751        _length_dictionary::<UInt64Type>();
752    }
753
754    fn _length_dictionary<K: ArrowDictionaryKeyType>() {
755        const TOTAL: i32 = 100;
756
757        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
758        let data: Vec<Option<&str>> = (0..TOTAL)
759            .map(|n| {
760                let i = n % 5;
761                if i == 3 { None } else { Some(v[i as usize]) }
762            })
763            .collect();
764
765        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
766
767        let expected: Vec<Option<i32>> =
768            data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
769
770        let res = length(&dict_array).unwrap();
771        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
772        let actual: Vec<Option<i32>> = actual
773            .values()
774            .as_any()
775            .downcast_ref::<Int32Array>()
776            .unwrap()
777            .take_iter(dict_array.keys_iter())
778            .collect();
779
780        for i in 0..TOTAL as usize {
781            assert_eq!(expected[i], actual[i],);
782        }
783    }
784
785    #[test]
786    fn bit_length_dictionary() {
787        _bit_length_dictionary::<Int8Type>();
788        _bit_length_dictionary::<Int16Type>();
789        _bit_length_dictionary::<Int32Type>();
790        _bit_length_dictionary::<Int64Type>();
791        _bit_length_dictionary::<UInt8Type>();
792        _bit_length_dictionary::<UInt16Type>();
793        _bit_length_dictionary::<UInt32Type>();
794        _bit_length_dictionary::<UInt64Type>();
795    }
796
797    fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
798        const TOTAL: i32 = 100;
799
800        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
801        let data: Vec<Option<&str>> = (0..TOTAL)
802            .map(|n| {
803                let i = n % 5;
804                if i == 3 { None } else { Some(v[i as usize]) }
805            })
806            .collect();
807
808        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
809
810        let expected: Vec<Option<i32>> = data
811            .iter()
812            .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
813            .collect();
814
815        let res = bit_length(&dict_array).unwrap();
816        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
817        let actual: Vec<Option<i32>> = actual
818            .values()
819            .as_any()
820            .downcast_ref::<Int32Array>()
821            .unwrap()
822            .take_iter(dict_array.keys_iter())
823            .collect();
824
825        for i in 0..TOTAL as usize {
826            assert_eq!(expected[i], actual[i],);
827        }
828    }
829
830    #[test]
831    fn test_fixed_size_list_length() {
832        // Construct a value array
833        let value_data = ArrayData::builder(DataType::Int32)
834            .len(9)
835            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
836            .build()
837            .unwrap();
838        let list_data_type =
839            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
840        let nulls = NullBuffer::from(vec![true, false, true]);
841        let list_data = ArrayData::builder(list_data_type)
842            .len(3)
843            .add_child_data(value_data)
844            .nulls(Some(nulls))
845            .build()
846            .unwrap();
847        let list_array = FixedSizeListArray::from(list_data);
848
849        let lengths = length(&list_array).unwrap();
850        let lengths = lengths.as_primitive::<Int32Type>();
851
852        assert_eq!(lengths.len(), 3);
853        assert_eq!(lengths.value(0), 3);
854        assert!(lengths.is_null(1));
855        assert_eq!(lengths.value(2), 3);
856    }
857
858    #[test]
859    fn test_fixed_size_binary() {
860        let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
861        let result = length(&array).unwrap();
862        assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
863
864        let result = bit_length(&array).unwrap();
865        assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
866    }
867    #[test]
868    fn length_test_ree_string_values() {
869        use arrow_array::RunArray;
870        use arrow_array::types::Int32Type;
871
872        let string_values = StringArray::from(vec!["hello", "owl", "test", "arrow", "a"]);
873        let run_ends = PrimitiveArray::<Int32Type>::from(vec![2i32, 5, 9, 11, 14]);
874        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &string_values).unwrap();
875
876        let result = length(&ree_array).unwrap();
877        let result = result
878            .as_any()
879            .downcast_ref::<RunArray<Int32Type>>()
880            .unwrap();
881
882        let result_values = result
883            .values()
884            .as_any()
885            .downcast_ref::<Int32Array>()
886            .unwrap();
887
888        let expected: Int32Array = vec![5, 3, 4, 5, 1].into();
889        assert_eq!(&expected, result_values);
890    }
891    #[test]
892    fn length_test_ree_invalid_type_early_fail() {
893        use arrow_array::RunArray;
894        use arrow_array::types::Int32Type;
895
896        let uint64_values = UInt64Array::from(vec![1u64, 2, 3]);
897        let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
898        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &uint64_values).unwrap();
899
900        assert!(length(&ree_array).is_err());
901    }
902
903    #[test]
904    fn bit_length_test_ree_utf8() {
905        use arrow_array::RunArray;
906        use arrow_array::types::Int32Type;
907
908        let strings = StringArray::from(vec!["hello", "world", "test"]);
909        let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
910        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &strings).unwrap();
911
912        let result = bit_length(&ree_array).unwrap();
913        let result_values = result
914            .as_any()
915            .downcast_ref::<RunArray<Int32Type>>()
916            .unwrap()
917            .values()
918            .as_any()
919            .downcast_ref::<Int32Array>()
920            .unwrap();
921
922        let expected: Int32Array = vec![40, 40, 32].into();
923        assert_eq!(&expected, result_values);
924    }
925}