datafusion_functions_nested/
length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for array_length function.
19
20use crate::utils::make_scalar_function;
21use arrow::array::{
22    Array, ArrayRef, FixedSizeListArray, Int64Array, LargeListArray, ListArray,
23    OffsetSizeTrait, UInt64Array,
24};
25use arrow::datatypes::{
26    DataType,
27    DataType::{FixedSizeList, LargeList, List, UInt64},
28};
29use datafusion_common::cast::{
30    as_fixed_size_list_array, as_generic_list_array, as_int64_array,
31};
32use datafusion_common::{exec_err, internal_datafusion_err, plan_err, Result};
33use datafusion_expr::{
34    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
35};
36use datafusion_functions::{downcast_arg, downcast_named_arg};
37use datafusion_macros::user_doc;
38use std::any::Any;
39use std::sync::Arc;
40
41make_udf_expr_and_func!(
42    ArrayLength,
43    array_length,
44    array,
45    "returns the length of the array dimension.",
46    array_length_udf
47);
48
49#[user_doc(
50    doc_section(label = "Array Functions"),
51    description = "Returns the length of the array dimension.",
52    syntax_example = "array_length(array, dimension)",
53    sql_example = r#"```sql
54> select array_length([1, 2, 3, 4, 5], 1);
55+-------------------------------------------+
56| array_length(List([1,2,3,4,5]), 1)        |
57+-------------------------------------------+
58| 5                                         |
59+-------------------------------------------+
60```"#,
61    argument(
62        name = "array",
63        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
64    ),
65    argument(name = "dimension", description = "Array dimension.")
66)]
67#[derive(Debug)]
68pub struct ArrayLength {
69    signature: Signature,
70    aliases: Vec<String>,
71}
72
73impl Default for ArrayLength {
74    fn default() -> Self {
75        Self::new()
76    }
77}
78
79impl ArrayLength {
80    pub fn new() -> Self {
81        Self {
82            signature: Signature::variadic_any(Volatility::Immutable),
83            aliases: vec![String::from("list_length")],
84        }
85    }
86}
87
88impl ScalarUDFImpl for ArrayLength {
89    fn as_any(&self) -> &dyn Any {
90        self
91    }
92    fn name(&self) -> &str {
93        "array_length"
94    }
95
96    fn signature(&self) -> &Signature {
97        &self.signature
98    }
99
100    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
101        Ok(match arg_types[0] {
102            List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
103            _ => {
104                return plan_err!("The array_length function can only accept List/LargeList/FixedSizeList.");
105            }
106        })
107    }
108
109    fn invoke_with_args(
110        &self,
111        args: datafusion_expr::ScalarFunctionArgs,
112    ) -> Result<ColumnarValue> {
113        make_scalar_function(array_length_inner)(&args.args)
114    }
115
116    fn aliases(&self) -> &[String] {
117        &self.aliases
118    }
119
120    fn documentation(&self) -> Option<&Documentation> {
121        self.doc()
122    }
123}
124
125macro_rules! array_length_impl {
126    ($array:expr, $dimension:expr) => {{
127        let array = $array;
128        let dimension = match $dimension {
129            Some(d) => as_int64_array(d)?.clone(),
130            None => Int64Array::from_value(1, array.len()),
131        };
132        let result = array
133            .iter()
134            .zip(dimension.iter())
135            .map(|(arr, dim)| compute_array_length(arr, dim))
136            .collect::<Result<UInt64Array>>()?;
137
138        Ok(Arc::new(result) as ArrayRef)
139    }};
140}
141
142/// Array_length SQL function
143pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
144    if args.len() != 1 && args.len() != 2 {
145        return exec_err!("array_length expects one or two arguments");
146    }
147
148    match &args[0].data_type() {
149        List(_) => general_array_length::<i32>(args),
150        LargeList(_) => general_array_length::<i64>(args),
151        FixedSizeList(_, _) => fixed_size_array_length(args),
152        array_type => exec_err!("array_length does not support type '{array_type:?}'"),
153    }
154}
155
156fn fixed_size_array_length(array: &[ArrayRef]) -> Result<ArrayRef> {
157    array_length_impl!(as_fixed_size_list_array(&array[0])?, array.get(1))
158}
159
160/// Dispatch array length computation based on the offset type.
161fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayRef> {
162    array_length_impl!(as_generic_list_array::<O>(&array[0])?, array.get(1))
163}
164
165/// Returns the length of a concrete array dimension
166fn compute_array_length(
167    arr: Option<ArrayRef>,
168    dimension: Option<i64>,
169) -> Result<Option<u64>> {
170    let mut current_dimension: i64 = 1;
171    let mut value = match arr {
172        Some(arr) => arr,
173        None => return Ok(None),
174    };
175    let dimension = match dimension {
176        Some(value) => {
177            if value < 1 {
178                return Ok(None);
179            }
180
181            value
182        }
183        None => return Ok(None),
184    };
185
186    loop {
187        if current_dimension == dimension {
188            return Ok(Some(value.len() as u64));
189        }
190
191        match value.data_type() {
192            List(..) => {
193                value = downcast_arg!(value, ListArray).value(0);
194                current_dimension += 1;
195            }
196            LargeList(..) => {
197                value = downcast_arg!(value, LargeListArray).value(0);
198                current_dimension += 1;
199            }
200            FixedSizeList(_, _) => {
201                value = downcast_arg!(value, FixedSizeListArray).value(0);
202                current_dimension += 1;
203            }
204            _ => return Ok(None),
205        }
206    }
207}