Skip to main content

arrow_string/
length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines kernel for length of string arrays and binary arrays
19
20use arrow_array::*;
21use arrow_array::{cast::AsArray, types::*};
22use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
23use arrow_schema::{ArrowError, DataType};
24use std::sync::Arc;
25macro_rules! ree_length {
26    ($array:expr, $run_type:ty, $k:expr, $v:expr) => {{
27        let ree = $array.as_run_opt::<$run_type>().unwrap();
28        let inner_value_lengths = length(ree.values().as_ref())?;
29        let out_ree = unsafe {
30            RunArray::<$run_type>::new_unchecked(
31                DataType::RunEndEncoded(Arc::clone($k), Arc::clone($v)),
32                ree.run_ends().clone(),
33                inner_value_lengths,
34            )
35        };
36        Ok(Arc::new(out_ree) as ArrayRef)
37    }};
38}
39
40fn length_impl<P: ArrowPrimitiveType>(
41    offsets: &OffsetBuffer<P::Native>,
42    nulls: Option<&NullBuffer>,
43) -> ArrayRef {
44    let v: Vec<_> = offsets
45        .windows(2)
46        .map(|w| w[1].sub_wrapping(w[0]))
47        .collect();
48    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
49}
50
51fn bit_length_impl<P: ArrowPrimitiveType>(
52    offsets: &OffsetBuffer<P::Native>,
53    nulls: Option<&NullBuffer>,
54) -> ArrayRef {
55    let bits = P::Native::usize_as(8);
56    let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
57    let v: Vec<_> = offsets.windows(2).map(c).collect();
58    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
59}
60
61/// Returns an array of Int32/Int64 denoting the length of each value in the array.
62///
63/// For list array, length is the number of elements in each list.
64/// For string array and binary array, length is the number of bytes of each value.
65///
66/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray/StringViewArray, BinaryArray/LargeBinaryArray, FixedSizeListArray,
67///   and ListViewArray/LargeListViewArray, or DictionaryArray with above Arrays as values, or
68///   RunEndEncoded arrays with above arrays as values
69/// * length of null is null.
70pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
71    if let Some(d) = array.as_any_dictionary_opt() {
72        let lengths = length(d.values().as_ref())?;
73        return Ok(d.with_values(lengths));
74    }
75    match array.data_type() {
76        DataType::List(_) => {
77            let list = array.as_list::<i32>();
78            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
79        }
80        DataType::LargeList(_) => {
81            let list = array.as_list::<i64>();
82            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
83        }
84        DataType::ListView(_) => {
85            let list = array.as_list_view::<i32>();
86            Ok(Arc::new(Int32Array::new(
87                list.sizes().clone(),
88                list.nulls().cloned(),
89            )))
90        }
91        DataType::LargeListView(_) => {
92            let list = array.as_list_view::<i64>();
93            Ok(Arc::new(Int64Array::new(
94                list.sizes().clone(),
95                list.nulls().cloned(),
96            )))
97        }
98        DataType::Utf8 => {
99            let list = array.as_string::<i32>();
100            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
101        }
102        DataType::LargeUtf8 => {
103            let list = array.as_string::<i64>();
104            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
105        }
106        DataType::Utf8View => {
107            let list = array.as_string_view();
108            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
109            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
110                v.into(),
111                list.nulls().cloned(),
112            )?))
113        }
114        DataType::Binary => {
115            let list = array.as_binary::<i32>();
116            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
117        }
118        DataType::LargeBinary => {
119            let list = array.as_binary::<i64>();
120            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
121        }
122        DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
123            Int32Array::try_new(vec![*len; array.len()].into(), array.nulls().cloned())?,
124        )),
125        DataType::BinaryView => {
126            let list = array.as_binary_view();
127            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
128            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
129                v.into(),
130                list.nulls().cloned(),
131            )?))
132        }
133        DataType::RunEndEncoded(k, v) => match k.data_type() {
134            DataType::Int16 => ree_length!(array, Int16Type, &k, &v),
135            DataType::Int32 => ree_length!(array, Int32Type, &k, &v),
136            DataType::Int64 => ree_length!(array, Int64Type, &k, &v),
137            _ => Err(ArrowError::InvalidArgumentError(format!(
138                "Invalid run-end type: {:?}",
139                k.data_type()
140            ))),
141        },
142        other => Err(ArrowError::ComputeError(format!(
143            "length not supported for {other:?}"
144        ))),
145    }
146}
147
148/// Returns an array of Int32/Int64 denoting the number of bits in each value in the array.
149///
150/// * this only accepts StringArray/Utf8, LargeString/LargeUtf8, StringViewArray/Utf8View,
151///   BinaryArray, LargeBinaryArray, BinaryViewArray, and FixedSizeBinaryArray,
152///   or DictionaryArray with above Arrays as values
153/// * bit_length of null is null.
154/// * bit_length is in number of bits
155pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
156    if let Some(d) = array.as_any_dictionary_opt() {
157        let lengths = bit_length(d.values().as_ref())?;
158        return Ok(d.with_values(lengths));
159    }
160
161    match array.data_type() {
162        DataType::Utf8 => {
163            let list = array.as_string::<i32>();
164            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
165        }
166        DataType::LargeUtf8 => {
167            let list = array.as_string::<i64>();
168            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
169        }
170        DataType::Utf8View => {
171            let list = array.as_string_view();
172            let values = list
173                .views()
174                .iter()
175                .map(|view| (*view as i32).wrapping_mul(8))
176                .collect();
177            Ok(Arc::new(Int32Array::try_new(
178                values,
179                array.nulls().cloned(),
180            )?))
181        }
182        DataType::Binary => {
183            let list = array.as_binary::<i32>();
184            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
185        }
186        DataType::LargeBinary => {
187            let list = array.as_binary::<i64>();
188            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
189        }
190        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
191            vec![*len * 8; array.len()].into(),
192            array.nulls().cloned(),
193        )?)),
194        DataType::BinaryView => {
195            let list = array.as_binary_view();
196            let values = list
197                .views()
198                .iter()
199                .map(|view| (*view as i32).wrapping_mul(8))
200                .collect();
201            Ok(Arc::new(Int32Array::try_new(
202                values,
203                array.nulls().cloned(),
204            )?))
205        }
206        other => Err(ArrowError::ComputeError(format!(
207            "bit_length not supported for {other:?}"
208        ))),
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use arrow_buffer::{Buffer, ScalarBuffer};
216    use arrow_data::ArrayData;
217    use arrow_schema::Field;
218
219    fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
220        // a large array
221        let values = [
222            "one",
223            "on",
224            "o",
225            "",
226            "this is a longer string to test string array with",
227        ];
228        let values = values.into_iter().cycle().take(4096).collect();
229        let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
230
231        vec![
232            (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
233            (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
234            (vec!["💖"], 1, vec![4]),
235            (values, 4096, expected),
236        ]
237    }
238
239    macro_rules! length_binary_helper {
240        ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
241            let array = GenericBinaryArray::<$offset_ty>::from($value);
242            let result = $kernel(&array).unwrap();
243            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
244            let expected: $result_ty = $expected.into();
245            assert_eq!(&expected, result);
246        }};
247    }
248
249    macro_rules! length_list_helper {
250        ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
251            let array =
252                GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
253            let result = length(&array).unwrap();
254            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
255            let expected: $result_ty = $expected.into();
256            assert_eq!(&expected, result);
257        }};
258    }
259
260    #[test]
261    fn length_test_string() {
262        length_cases_string()
263            .into_iter()
264            .for_each(|(input, len, expected)| {
265                let array = StringArray::from(input);
266                let result = length(&array).unwrap();
267                assert_eq!(len, result.len());
268                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
269                expected.iter().enumerate().for_each(|(i, value)| {
270                    assert_eq!(*value, result.value(i));
271                });
272            })
273    }
274
275    #[test]
276    fn length_test_large_string() {
277        length_cases_string()
278            .into_iter()
279            .for_each(|(input, len, expected)| {
280                let array = LargeStringArray::from(input);
281                let result = length(&array).unwrap();
282                assert_eq!(len, result.len());
283                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
284                expected.iter().enumerate().for_each(|(i, value)| {
285                    assert_eq!(*value as i64, result.value(i));
286                });
287            })
288    }
289
290    #[test]
291    fn length_test_string_view() {
292        length_cases_string()
293            .into_iter()
294            .for_each(|(input, len, expected)| {
295                let array = StringViewArray::from(input);
296                let result = length(&array).unwrap();
297                assert_eq!(len, result.len());
298                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
299                expected.iter().enumerate().for_each(|(i, value)| {
300                    assert_eq!(*value, result.value(i));
301                });
302            })
303    }
304
305    #[test]
306    fn length_test_binary() {
307        let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
308        let result: Vec<i32> = vec![4, 3, 2];
309        length_binary_helper!(i32, Int32Array, length, value, result)
310    }
311
312    #[test]
313    fn length_test_large_binary() {
314        let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
315        let result: Vec<i64> = vec![4, 2, 3];
316        length_binary_helper!(i64, Int64Array, length, value, result)
317    }
318
319    #[test]
320    fn length_test_binary_view() {
321        let value: Vec<&[u8]> = vec![
322            b"zero",
323            &[0xff, 0xf8],
324            b"two",
325            b"this is a longer string to test binary array with",
326        ];
327        let expected: Vec<i32> = vec![4, 2, 3, 49];
328
329        let array = BinaryViewArray::from(value);
330        let result = length(&array).unwrap();
331        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
332        let expected: Int32Array = expected.into();
333        assert_eq!(&expected, result);
334    }
335
336    #[test]
337    fn length_test_list() {
338        let value = vec![
339            Some(vec![]),
340            Some(vec![Some(1), Some(2), Some(4)]),
341            Some(vec![Some(0)]),
342        ];
343        let result: Vec<i32> = vec![0, 3, 1];
344        length_list_helper!(i32, Int32Array, Int32Type, value, result)
345    }
346
347    #[test]
348    fn length_test_large_list() {
349        let value = vec![
350            Some(vec![]),
351            Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
352            Some(vec![None]),
353        ];
354        let result: Vec<i64> = vec![0, 3, 1];
355        length_list_helper!(i64, Int64Array, Float32Type, value, result)
356    }
357
358    type OptionStr = Option<&'static str>;
359
360    fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
361        vec![(
362            vec![Some("one"), None, Some("three"), Some("four")],
363            4,
364            vec![Some(3), None, Some(5), Some(4)],
365        )]
366    }
367
368    #[test]
369    fn length_null_string() {
370        length_null_cases_string()
371            .into_iter()
372            .for_each(|(input, len, expected)| {
373                let array = StringArray::from(input);
374                let result = length(&array).unwrap();
375                assert_eq!(len, result.len());
376                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
377
378                let expected: Int32Array = expected.into();
379                assert_eq!(&expected, result);
380            })
381    }
382
383    #[test]
384    fn length_null_large_string() {
385        length_null_cases_string()
386            .into_iter()
387            .for_each(|(input, len, expected)| {
388                let array = LargeStringArray::from(input);
389                let result = length(&array).unwrap();
390                assert_eq!(len, result.len());
391                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
392
393                // convert to i64
394                let expected: Int64Array = expected
395                    .iter()
396                    .map(|e| e.map(|e| e as i64))
397                    .collect::<Vec<_>>()
398                    .into();
399                assert_eq!(&expected, result);
400            })
401    }
402
403    #[test]
404    fn length_null_binary() {
405        let value: Vec<Option<&[u8]>> =
406            vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
407        let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
408        length_binary_helper!(i32, Int32Array, length, value, result)
409    }
410
411    #[test]
412    fn length_null_large_binary() {
413        let value: Vec<Option<&[u8]>> =
414            vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
415        let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
416        length_binary_helper!(i64, Int64Array, length, value, result)
417    }
418
419    #[test]
420    fn length_null_list() {
421        let value = vec![
422            Some(vec![]),
423            None,
424            Some(vec![Some(1), None, Some(2), Some(4)]),
425            Some(vec![Some(0)]),
426        ];
427        let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
428        length_list_helper!(i32, Int32Array, Int8Type, value, result)
429    }
430
431    #[test]
432    fn length_null_large_list() {
433        let value = vec![
434            Some(vec![]),
435            None,
436            Some(vec![Some(1.1), None, Some(4.0)]),
437            Some(vec![Some(0.1)]),
438        ];
439        let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
440        length_list_helper!(i64, Int64Array, Float32Type, value, result)
441    }
442
443    #[test]
444    fn length_test_list_view() {
445        // Create a ListViewArray with values [0, 1, 2], [3, 4, 5], [6, 7]
446        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
447        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
448        let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
449        let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
450        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
451
452        let result = length(&list_array).unwrap();
453        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
454        let expected: Int32Array = vec![3, 3, 2].into();
455        assert_eq!(&expected, result);
456    }
457
458    #[test]
459    fn length_test_large_list_view() {
460        // Create a LargeListViewArray with values [0, 1, 2], [3, 4, 5], [6, 7]
461        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
462        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
463        let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
464        let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
465        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
466
467        let result = length(&list_array).unwrap();
468        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
469        let expected: Int64Array = vec![3i64, 3, 2].into();
470        assert_eq!(&expected, result);
471    }
472
473    #[test]
474    fn length_null_list_view() {
475        // Create a ListViewArray with nulls: [], null, [1, 2, 3, 4], [0]
476        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
477        let values = Int32Array::from(vec![1, 2, 3, 4, 0]);
478        let offsets = ScalarBuffer::from(vec![0i32, 0, 0, 4]);
479        let sizes = ScalarBuffer::from(vec![0i32, 0, 4, 1]);
480        let nulls = NullBuffer::from(vec![true, false, true, true]);
481        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
482
483        let result = length(&list_array).unwrap();
484        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
485        let expected: Int32Array = vec![Some(0), None, Some(4), Some(1)].into();
486        assert_eq!(&expected, result);
487    }
488
489    #[test]
490    fn length_null_large_list_view() {
491        // Create a LargeListViewArray with nulls: [], null, [1.0, 2.0, 3.0], [0.1]
492        let field = Arc::new(Field::new_list_field(DataType::Float32, true));
493        let values = Float32Array::from(vec![1.0, 2.0, 3.0, 0.1]);
494        let offsets = ScalarBuffer::from(vec![0i64, 0, 0, 3]);
495        let sizes = ScalarBuffer::from(vec![0i64, 0, 3, 1]);
496        let nulls = NullBuffer::from(vec![true, false, true, true]);
497        let list_array =
498            LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
499
500        let result = length(&list_array).unwrap();
501        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
502        let expected: Int64Array = vec![Some(0i64), None, Some(3), Some(1)].into();
503        assert_eq!(&expected, result);
504    }
505
506    /// Tests that length is not valid for u64.
507    #[test]
508    fn length_wrong_type() {
509        let array: UInt64Array = vec![1u64].into();
510
511        assert!(length(&array).is_err());
512    }
513
514    /// Tests with an offset
515    #[test]
516    fn length_offsets_string() {
517        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
518        let b = a.slice(1, 3);
519        let result = length(&b).unwrap();
520        let result: &Int32Array = result.as_primitive();
521
522        let expected = Int32Array::from(vec![Some(1), Some(5), None]);
523        assert_eq!(&expected, result);
524    }
525
526    #[test]
527    fn length_offsets_binary() {
528        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
529        let a = BinaryArray::from(value);
530        let b = a.slice(1, 3);
531        let result = length(&b).unwrap();
532        let result: &Int32Array = result.as_primitive();
533
534        let expected = Int32Array::from(vec![Some(1), Some(2), None]);
535        assert_eq!(&expected, result);
536    }
537
538    fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
539        // a large array
540        let values = ["one", "on", "o", ""];
541        let values = values.into_iter().cycle().take(4096).collect();
542        let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
543
544        vec![
545            (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
546            (vec!["💖"], 1, vec![32]),
547            (vec!["josé"], 1, vec![40]),
548            (values, 4096, expected),
549        ]
550    }
551
552    #[test]
553    fn bit_length_test_string() {
554        bit_length_cases()
555            .into_iter()
556            .for_each(|(input, len, expected)| {
557                let array = StringArray::from(input);
558                let result = bit_length(&array).unwrap();
559                assert_eq!(len, result.len());
560                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
561                expected.iter().enumerate().for_each(|(i, value)| {
562                    assert_eq!(*value, result.value(i));
563                });
564            })
565    }
566
567    #[test]
568    fn bit_length_test_large_string() {
569        bit_length_cases()
570            .into_iter()
571            .for_each(|(input, len, expected)| {
572                let array = LargeStringArray::from(input);
573                let result = bit_length(&array).unwrap();
574                assert_eq!(len, result.len());
575                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
576                expected.iter().enumerate().for_each(|(i, value)| {
577                    assert_eq!(*value as i64, result.value(i));
578                });
579            })
580    }
581
582    #[test]
583    fn bit_length_test_utf8view() {
584        bit_length_cases()
585            .into_iter()
586            .for_each(|(input, len, expected)| {
587                let string_array = StringViewArray::from(input);
588                let result = bit_length(&string_array).unwrap();
589                assert_eq!(len, result.len());
590                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
591                expected.iter().enumerate().for_each(|(i, value)| {
592                    assert_eq!(*value, result.value(i));
593                });
594            })
595    }
596
597    #[test]
598    fn bit_length_null_utf8view() {
599        bit_length_null_cases()
600            .into_iter()
601            .for_each(|(input, len, expected)| {
602                let array = StringArray::from(input);
603                let result = bit_length(&array).unwrap();
604                assert_eq!(len, result.len());
605                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
606
607                let expected: Int32Array = expected.into();
608                assert_eq!(&expected, result);
609            })
610    }
611    #[test]
612    fn bit_length_binary() {
613        let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
614        let expected: Vec<i32> = vec![24, 16, 40];
615        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
616    }
617
618    #[test]
619    fn bit_length_large_binary() {
620        let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
621        let expected: Vec<i64> = vec![32, 8, 16];
622        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
623    }
624
625    #[test]
626    fn bit_length_binary_view() {
627        let value: Vec<&[u8]> = vec![
628            b"zero",
629            &[0xff, 0xf8],
630            b"two",
631            b"this is a longer string to test binary array with",
632        ];
633        let expected: Vec<i32> = vec![32, 16, 24, 392];
634
635        let array = BinaryViewArray::from(value);
636        let result = bit_length(&array).unwrap();
637        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
638        let expected: Int32Array = expected.into();
639        assert_eq!(&expected, result);
640    }
641
642    #[test]
643    fn bit_length_null_binary_view() {
644        let value: Vec<Option<&[u8]>> =
645            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
646        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
647
648        let array = BinaryViewArray::from(value);
649        let result = bit_length(&array).unwrap();
650        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
651        let expected: Int32Array = expected.into();
652        assert_eq!(&expected, result);
653    }
654
655    fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
656        vec![(
657            vec![Some("one"), None, Some("three"), Some("four")],
658            4,
659            vec![Some(24), None, Some(40), Some(32)],
660        )]
661    }
662
663    #[test]
664    fn bit_length_null_string() {
665        bit_length_null_cases()
666            .into_iter()
667            .for_each(|(input, len, expected)| {
668                let array = StringArray::from(input);
669                let result = bit_length(&array).unwrap();
670                assert_eq!(len, result.len());
671                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
672
673                let expected: Int32Array = expected.into();
674                assert_eq!(&expected, result);
675            })
676    }
677
678    #[test]
679    fn bit_length_null_large_string() {
680        bit_length_null_cases()
681            .into_iter()
682            .for_each(|(input, len, expected)| {
683                let array = LargeStringArray::from(input);
684                let result = bit_length(&array).unwrap();
685                assert_eq!(len, result.len());
686                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
687
688                // convert to i64
689                let expected: Int64Array = expected
690                    .iter()
691                    .map(|e| e.map(|e| e as i64))
692                    .collect::<Vec<_>>()
693                    .into();
694                assert_eq!(&expected, result);
695            })
696    }
697
698    #[test]
699    fn bit_length_null_binary() {
700        let value: Vec<Option<&[u8]>> =
701            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
702        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
703        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
704    }
705
706    #[test]
707    fn bit_length_null_large_binary() {
708        let value: Vec<Option<&[u8]>> =
709            vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
710        let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
711        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
712    }
713
714    /// Tests that bit_length is not valid for u64.
715    #[test]
716    fn bit_length_wrong_type() {
717        let array: UInt64Array = vec![1u64].into();
718
719        assert!(bit_length(&array).is_err());
720    }
721
722    /// Tests with an offset
723    #[test]
724    fn bit_length_offsets_string() {
725        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
726        let b = a.slice(1, 3);
727        let result = bit_length(&b).unwrap();
728        let result: &Int32Array = result.as_primitive();
729
730        let expected = Int32Array::from(vec![Some(8), Some(40), None]);
731        assert_eq!(&expected, result);
732    }
733
734    #[test]
735    fn bit_length_offsets_binary() {
736        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
737        let a = BinaryArray::from(value);
738        let b = a.slice(1, 3);
739        let result = bit_length(&b).unwrap();
740        let result: &Int32Array = result.as_primitive();
741
742        let expected = Int32Array::from(vec![Some(0), Some(40), None]);
743        assert_eq!(&expected, result);
744    }
745
746    #[test]
747    fn length_dictionary() {
748        _length_dictionary::<Int8Type>();
749        _length_dictionary::<Int16Type>();
750        _length_dictionary::<Int32Type>();
751        _length_dictionary::<Int64Type>();
752        _length_dictionary::<UInt8Type>();
753        _length_dictionary::<UInt16Type>();
754        _length_dictionary::<UInt32Type>();
755        _length_dictionary::<UInt64Type>();
756    }
757
758    fn _length_dictionary<K: ArrowDictionaryKeyType>() {
759        const TOTAL: i32 = 100;
760
761        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
762        let data: Vec<Option<&str>> = (0..TOTAL)
763            .map(|n| {
764                let i = n % 5;
765                if i == 3 { None } else { Some(v[i as usize]) }
766            })
767            .collect();
768
769        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
770
771        let expected: Vec<Option<i32>> =
772            data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
773
774        let res = length(&dict_array).unwrap();
775        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
776        let actual: Vec<Option<i32>> = actual
777            .values()
778            .as_any()
779            .downcast_ref::<Int32Array>()
780            .unwrap()
781            .take_iter(dict_array.keys_iter())
782            .collect();
783
784        for i in 0..TOTAL as usize {
785            assert_eq!(expected[i], actual[i],);
786        }
787    }
788
789    #[test]
790    fn bit_length_dictionary() {
791        _bit_length_dictionary::<Int8Type>();
792        _bit_length_dictionary::<Int16Type>();
793        _bit_length_dictionary::<Int32Type>();
794        _bit_length_dictionary::<Int64Type>();
795        _bit_length_dictionary::<UInt8Type>();
796        _bit_length_dictionary::<UInt16Type>();
797        _bit_length_dictionary::<UInt32Type>();
798        _bit_length_dictionary::<UInt64Type>();
799    }
800
801    fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
802        const TOTAL: i32 = 100;
803
804        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
805        let data: Vec<Option<&str>> = (0..TOTAL)
806            .map(|n| {
807                let i = n % 5;
808                if i == 3 { None } else { Some(v[i as usize]) }
809            })
810            .collect();
811
812        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
813
814        let expected: Vec<Option<i32>> = data
815            .iter()
816            .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
817            .collect();
818
819        let res = bit_length(&dict_array).unwrap();
820        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
821        let actual: Vec<Option<i32>> = actual
822            .values()
823            .as_any()
824            .downcast_ref::<Int32Array>()
825            .unwrap()
826            .take_iter(dict_array.keys_iter())
827            .collect();
828
829        for i in 0..TOTAL as usize {
830            assert_eq!(expected[i], actual[i],);
831        }
832    }
833
834    #[test]
835    fn test_fixed_size_list_length() {
836        // Construct a value array
837        let value_data = ArrayData::builder(DataType::Int32)
838            .len(9)
839            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
840            .build()
841            .unwrap();
842        let list_data_type =
843            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
844        let nulls = NullBuffer::from(vec![true, false, true]);
845        let list_data = ArrayData::builder(list_data_type)
846            .len(3)
847            .add_child_data(value_data)
848            .nulls(Some(nulls))
849            .build()
850            .unwrap();
851        let list_array = FixedSizeListArray::from(list_data);
852
853        let lengths = length(&list_array).unwrap();
854        let lengths = lengths.as_primitive::<Int32Type>();
855
856        assert_eq!(lengths.len(), 3);
857        assert_eq!(lengths.value(0), 3);
858        assert!(lengths.is_null(1));
859        assert_eq!(lengths.value(2), 3);
860    }
861
862    #[test]
863    fn test_fixed_size_binary() {
864        let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
865        let result = length(&array).unwrap();
866        assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
867
868        let result = bit_length(&array).unwrap();
869        assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
870    }
871    #[test]
872    fn length_test_ree_string_values() {
873        use arrow_array::RunArray;
874        use arrow_array::types::Int32Type;
875
876        let string_values = StringArray::from(vec!["hello", "owl", "test", "arrow", "a"]);
877        let run_ends = PrimitiveArray::<Int32Type>::from(vec![2i32, 5, 9, 11, 14]);
878        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &string_values).unwrap();
879
880        let result = length(&ree_array).unwrap();
881        let result = result
882            .as_any()
883            .downcast_ref::<RunArray<Int32Type>>()
884            .unwrap();
885
886        let result_values = result
887            .values()
888            .as_any()
889            .downcast_ref::<Int32Array>()
890            .unwrap();
891
892        let expected: Int32Array = vec![5, 3, 4, 5, 1].into();
893        assert_eq!(&expected, result_values);
894    }
895    #[test]
896    fn length_test_ree_invalid_type_early_fail() {
897        use arrow_array::RunArray;
898        use arrow_array::types::Int32Type;
899
900        let uint64_values = UInt64Array::from(vec![1u64, 2, 3]);
901        let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
902        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &uint64_values).unwrap();
903
904        assert!(length(&ree_array).is_err());
905    }
906}