Skip to main content

datafusion_functions_nested/
length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for array_length function.
19
20use crate::utils::make_scalar_function;
21use arrow::array::{
22    Array, ArrayRef, FixedSizeListArray, Int64Array, LargeListArray, ListArray,
23    OffsetSizeTrait, UInt64Array,
24};
25use arrow::datatypes::{
26    DataType,
27    DataType::{FixedSizeList, LargeList, List, UInt64},
28};
29use datafusion_common::cast::{
30    as_fixed_size_list_array, as_generic_list_array, as_int64_array,
31};
32use datafusion_common::{Result, exec_err};
33use datafusion_expr::{
34    ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
35    ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
36};
37use datafusion_functions::downcast_arg;
38use datafusion_macros::user_doc;
39use std::sync::Arc;
40
41make_udf_expr_and_func!(
42    ArrayLength,
43    array_length,
44    array,
45    "returns the length of the array dimension.",
46    array_length_udf
47);
48
49#[user_doc(
50    doc_section(label = "Array Functions"),
51    description = "Returns the length of the array dimension.",
52    syntax_example = "array_length(array, dimension)",
53    sql_example = r#"```sql
54> select array_length([1, 2, 3, 4, 5], 1);
55+-------------------------------------------+
56| array_length(List([1,2,3,4,5]), 1)        |
57+-------------------------------------------+
58| 5                                         |
59+-------------------------------------------+
60```"#,
61    argument(
62        name = "array",
63        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
64    ),
65    argument(name = "dimension", description = "Array dimension.")
66)]
67#[derive(Debug, PartialEq, Eq, Hash)]
68pub struct ArrayLength {
69    signature: Signature,
70    aliases: Vec<String>,
71}
72
73impl Default for ArrayLength {
74    fn default() -> Self {
75        Self::new()
76    }
77}
78
79impl ArrayLength {
80    pub fn new() -> Self {
81        Self {
82            signature: Signature::one_of(
83                vec![
84                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
85                        arguments: vec![ArrayFunctionArgument::Array],
86                        array_coercion: None,
87                    }),
88                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
89                        arguments: vec![
90                            ArrayFunctionArgument::Array,
91                            ArrayFunctionArgument::Index,
92                        ],
93                        array_coercion: None,
94                    }),
95                ],
96                Volatility::Immutable,
97            ),
98            aliases: vec![String::from("list_length")],
99        }
100    }
101}
102
103impl ScalarUDFImpl for ArrayLength {
104    fn name(&self) -> &str {
105        "array_length"
106    }
107
108    fn signature(&self) -> &Signature {
109        &self.signature
110    }
111
112    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
113        Ok(UInt64)
114    }
115
116    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
117        make_scalar_function(array_length_inner)(&args.args)
118    }
119
120    fn aliases(&self) -> &[String] {
121        &self.aliases
122    }
123
124    fn documentation(&self) -> Option<&Documentation> {
125        self.doc()
126    }
127}
128
129macro_rules! array_length_impl {
130    ($array:expr, $dimension:expr) => {{
131        let array = $array;
132        let dimension = match $dimension {
133            Some(d) => as_int64_array(d)?.clone(),
134            None => Int64Array::from_value(1, array.len()),
135        };
136        let result = array
137            .iter()
138            .zip(dimension.iter())
139            .map(|(arr, dim)| compute_array_length(arr, dim))
140            .collect::<Result<UInt64Array>>()?;
141
142        Ok(Arc::new(result) as ArrayRef)
143    }};
144}
145
146fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
147    if args.len() != 1 && args.len() != 2 {
148        return exec_err!("array_length expects one or two arguments");
149    }
150
151    match &args[0].data_type() {
152        List(_) => general_array_length::<i32>(args),
153        LargeList(_) => general_array_length::<i64>(args),
154        FixedSizeList(_, _) => fixed_size_array_length(args),
155        array_type => exec_err!("array_length does not support type '{array_type}'"),
156    }
157}
158
159fn fixed_size_array_length(array: &[ArrayRef]) -> Result<ArrayRef> {
160    array_length_impl!(as_fixed_size_list_array(&array[0])?, array.get(1))
161}
162
163/// Dispatch array length computation based on the offset type.
164fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayRef> {
165    array_length_impl!(as_generic_list_array::<O>(&array[0])?, array.get(1))
166}
167
168/// Returns the length of a concrete array dimension
169fn compute_array_length(
170    arr: Option<ArrayRef>,
171    dimension: Option<i64>,
172) -> Result<Option<u64>> {
173    let mut current_dimension: i64 = 1;
174    let mut value = match arr {
175        Some(arr) => arr,
176        None => return Ok(None),
177    };
178    let dimension = match dimension {
179        Some(value) => {
180            if value < 1 {
181                return Ok(None);
182            }
183
184            value
185        }
186        None => return Ok(None),
187    };
188
189    loop {
190        if current_dimension == dimension {
191            return Ok(Some(value.len() as u64));
192        }
193
194        match value.data_type() {
195            List(..) => {
196                value = downcast_arg!(value, ListArray).value(0);
197                current_dimension += 1;
198            }
199            LargeList(..) => {
200                value = downcast_arg!(value, LargeListArray).value(0);
201                current_dimension += 1;
202            }
203            FixedSizeList(_, _) => {
204                value = downcast_arg!(value, FixedSizeListArray).value(0);
205                current_dimension += 1;
206            }
207            _ => return Ok(None),
208        }
209    }
210}