datafusion_functions_nested/
length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for array_length function.
19
20use crate::utils::make_scalar_function;
21use arrow::array::{
22    Array, ArrayRef, FixedSizeListArray, Int64Array, LargeListArray, ListArray,
23    OffsetSizeTrait, UInt64Array,
24};
25use arrow::datatypes::{
26    DataType,
27    DataType::{FixedSizeList, LargeList, List, UInt64},
28};
29use datafusion_common::cast::{
30    as_fixed_size_list_array, as_generic_list_array, as_int64_array,
31};
32use datafusion_common::{exec_err, Result};
33use datafusion_expr::{
34    ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
35    ScalarUDFImpl, Signature, TypeSignature, Volatility,
36};
37use datafusion_functions::downcast_arg;
38use datafusion_macros::user_doc;
39use std::any::Any;
40use std::sync::Arc;
41
42make_udf_expr_and_func!(
43    ArrayLength,
44    array_length,
45    array,
46    "returns the length of the array dimension.",
47    array_length_udf
48);
49
50#[user_doc(
51    doc_section(label = "Array Functions"),
52    description = "Returns the length of the array dimension.",
53    syntax_example = "array_length(array, dimension)",
54    sql_example = r#"```sql
55> select array_length([1, 2, 3, 4, 5], 1);
56+-------------------------------------------+
57| array_length(List([1,2,3,4,5]), 1)        |
58+-------------------------------------------+
59| 5                                         |
60+-------------------------------------------+
61```"#,
62    argument(
63        name = "array",
64        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
65    ),
66    argument(name = "dimension", description = "Array dimension.")
67)]
68#[derive(Debug, PartialEq, Eq, Hash)]
69pub struct ArrayLength {
70    signature: Signature,
71    aliases: Vec<String>,
72}
73
74impl Default for ArrayLength {
75    fn default() -> Self {
76        Self::new()
77    }
78}
79
80impl ArrayLength {
81    pub fn new() -> Self {
82        Self {
83            signature: Signature::one_of(
84                vec![
85                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
86                        arguments: vec![ArrayFunctionArgument::Array],
87                        array_coercion: None,
88                    }),
89                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
90                        arguments: vec![
91                            ArrayFunctionArgument::Array,
92                            ArrayFunctionArgument::Index,
93                        ],
94                        array_coercion: None,
95                    }),
96                ],
97                Volatility::Immutable,
98            ),
99            aliases: vec![String::from("list_length")],
100        }
101    }
102}
103
104impl ScalarUDFImpl for ArrayLength {
105    fn as_any(&self) -> &dyn Any {
106        self
107    }
108    fn name(&self) -> &str {
109        "array_length"
110    }
111
112    fn signature(&self) -> &Signature {
113        &self.signature
114    }
115
116    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
117        Ok(UInt64)
118    }
119
120    fn invoke_with_args(
121        &self,
122        args: datafusion_expr::ScalarFunctionArgs,
123    ) -> Result<ColumnarValue> {
124        make_scalar_function(array_length_inner)(&args.args)
125    }
126
127    fn aliases(&self) -> &[String] {
128        &self.aliases
129    }
130
131    fn documentation(&self) -> Option<&Documentation> {
132        self.doc()
133    }
134}
135
136macro_rules! array_length_impl {
137    ($array:expr, $dimension:expr) => {{
138        let array = $array;
139        let dimension = match $dimension {
140            Some(d) => as_int64_array(d)?.clone(),
141            None => Int64Array::from_value(1, array.len()),
142        };
143        let result = array
144            .iter()
145            .zip(dimension.iter())
146            .map(|(arr, dim)| compute_array_length(arr, dim))
147            .collect::<Result<UInt64Array>>()?;
148
149        Ok(Arc::new(result) as ArrayRef)
150    }};
151}
152
153/// Array_length SQL function
154pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
155    if args.len() != 1 && args.len() != 2 {
156        return exec_err!("array_length expects one or two arguments");
157    }
158
159    match &args[0].data_type() {
160        List(_) => general_array_length::<i32>(args),
161        LargeList(_) => general_array_length::<i64>(args),
162        FixedSizeList(_, _) => fixed_size_array_length(args),
163        array_type => exec_err!("array_length does not support type '{array_type}'"),
164    }
165}
166
167fn fixed_size_array_length(array: &[ArrayRef]) -> Result<ArrayRef> {
168    array_length_impl!(as_fixed_size_list_array(&array[0])?, array.get(1))
169}
170
171/// Dispatch array length computation based on the offset type.
172fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayRef> {
173    array_length_impl!(as_generic_list_array::<O>(&array[0])?, array.get(1))
174}
175
176/// Returns the length of a concrete array dimension
177fn compute_array_length(
178    arr: Option<ArrayRef>,
179    dimension: Option<i64>,
180) -> Result<Option<u64>> {
181    let mut current_dimension: i64 = 1;
182    let mut value = match arr {
183        Some(arr) => arr,
184        None => return Ok(None),
185    };
186    let dimension = match dimension {
187        Some(value) => {
188            if value < 1 {
189                return Ok(None);
190            }
191
192            value
193        }
194        None => return Ok(None),
195    };
196
197    loop {
198        if current_dimension == dimension {
199            return Ok(Some(value.len() as u64));
200        }
201
202        match value.data_type() {
203            List(..) => {
204                value = downcast_arg!(value, ListArray).value(0);
205                current_dimension += 1;
206            }
207            LargeList(..) => {
208                value = downcast_arg!(value, LargeListArray).value(0);
209                current_dimension += 1;
210            }
211            FixedSizeList(_, _) => {
212                value = downcast_arg!(value, FixedSizeListArray).value(0);
213                current_dimension += 1;
214            }
215            _ => return Ok(None),
216        }
217    }
218}