datafusion_functions_nested/
length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for array_length function.
19
20use crate::utils::make_scalar_function;
21use arrow::array::{
22    Array, ArrayRef, Int64Array, LargeListArray, ListArray, OffsetSizeTrait, UInt64Array,
23};
24use arrow::datatypes::{
25    DataType,
26    DataType::{FixedSizeList, LargeList, List, UInt64},
27};
28use datafusion_common::cast::{as_generic_list_array, as_int64_array};
29use datafusion_common::{exec_err, internal_datafusion_err, plan_err, Result};
30use datafusion_expr::{
31    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
32};
33use datafusion_functions::{downcast_arg, downcast_named_arg};
34use datafusion_macros::user_doc;
35use std::any::Any;
36use std::sync::Arc;
37
38make_udf_expr_and_func!(
39    ArrayLength,
40    array_length,
41    array,
42    "returns the length of the array dimension.",
43    array_length_udf
44);
45
46#[user_doc(
47    doc_section(label = "Array Functions"),
48    description = "Returns the length of the array dimension.",
49    syntax_example = "array_length(array, dimension)",
50    sql_example = r#"```sql
51> select array_length([1, 2, 3, 4, 5], 1);
52+-------------------------------------------+
53| array_length(List([1,2,3,4,5]), 1)        |
54+-------------------------------------------+
55| 5                                         |
56+-------------------------------------------+
57```"#,
58    argument(
59        name = "array",
60        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
61    ),
62    argument(name = "dimension", description = "Array dimension.")
63)]
64#[derive(Debug)]
65pub struct ArrayLength {
66    signature: Signature,
67    aliases: Vec<String>,
68}
69
70impl Default for ArrayLength {
71    fn default() -> Self {
72        Self::new()
73    }
74}
75
76impl ArrayLength {
77    pub fn new() -> Self {
78        Self {
79            signature: Signature::variadic_any(Volatility::Immutable),
80            aliases: vec![String::from("list_length")],
81        }
82    }
83}
84
85impl ScalarUDFImpl for ArrayLength {
86    fn as_any(&self) -> &dyn Any {
87        self
88    }
89    fn name(&self) -> &str {
90        "array_length"
91    }
92
93    fn signature(&self) -> &Signature {
94        &self.signature
95    }
96
97    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
98        Ok(match arg_types[0] {
99            List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
100            _ => {
101                return plan_err!("The array_length function can only accept List/LargeList/FixedSizeList.");
102            }
103        })
104    }
105
106    fn invoke_with_args(
107        &self,
108        args: datafusion_expr::ScalarFunctionArgs,
109    ) -> Result<ColumnarValue> {
110        make_scalar_function(array_length_inner)(&args.args)
111    }
112
113    fn aliases(&self) -> &[String] {
114        &self.aliases
115    }
116
117    fn documentation(&self) -> Option<&Documentation> {
118        self.doc()
119    }
120}
121
122/// Array_length SQL function
123pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
124    if args.len() != 1 && args.len() != 2 {
125        return exec_err!("array_length expects one or two arguments");
126    }
127
128    match &args[0].data_type() {
129        List(_) => general_array_length::<i32>(args),
130        LargeList(_) => general_array_length::<i64>(args),
131        array_type => exec_err!("array_length does not support type '{array_type:?}'"),
132    }
133}
134
135/// Dispatch array length computation based on the offset type.
136fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayRef> {
137    let list_array = as_generic_list_array::<O>(&array[0])?;
138    let dimension = if array.len() == 2 {
139        as_int64_array(&array[1])?.clone()
140    } else {
141        Int64Array::from_value(1, list_array.len())
142    };
143
144    let result = list_array
145        .iter()
146        .zip(dimension.iter())
147        .map(|(arr, dim)| compute_array_length(arr, dim))
148        .collect::<Result<UInt64Array>>()?;
149
150    Ok(Arc::new(result) as ArrayRef)
151}
152
153/// Returns the length of a concrete array dimension
154fn compute_array_length(
155    arr: Option<ArrayRef>,
156    dimension: Option<i64>,
157) -> Result<Option<u64>> {
158    let mut current_dimension: i64 = 1;
159    let mut value = match arr {
160        Some(arr) => arr,
161        None => return Ok(None),
162    };
163    let dimension = match dimension {
164        Some(value) => {
165            if value < 1 {
166                return Ok(None);
167            }
168
169            value
170        }
171        None => return Ok(None),
172    };
173
174    loop {
175        if current_dimension == dimension {
176            return Ok(Some(value.len() as u64));
177        }
178
179        match value.data_type() {
180            List(..) => {
181                value = downcast_arg!(value, ListArray).value(0);
182                current_dimension += 1;
183            }
184            LargeList(..) => {
185                value = downcast_arg!(value, LargeListArray).value(0);
186                current_dimension += 1;
187            }
188            _ => return Ok(None),
189        }
190    }
191}