datafusion_functions_nested/
length.rs1use crate::utils::make_scalar_function;
21use arrow::array::{
22 Array, ArrayRef, FixedSizeListArray, Int64Array, LargeListArray, ListArray,
23 OffsetSizeTrait, UInt64Array,
24};
25use arrow::datatypes::{
26 DataType,
27 DataType::{FixedSizeList, LargeList, List, UInt64},
28};
29use datafusion_common::cast::{
30 as_fixed_size_list_array, as_generic_list_array, as_int64_array,
31};
32use datafusion_common::{exec_err, internal_datafusion_err, plan_err, Result};
33use datafusion_expr::{
34 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
35};
36use datafusion_functions::{downcast_arg, downcast_named_arg};
37use datafusion_macros::user_doc;
38use std::any::Any;
39use std::sync::Arc;
40
41make_udf_expr_and_func!(
42 ArrayLength,
43 array_length,
44 array,
45 "returns the length of the array dimension.",
46 array_length_udf
47);
48
49#[user_doc(
50 doc_section(label = "Array Functions"),
51 description = "Returns the length of the array dimension.",
52 syntax_example = "array_length(array, dimension)",
53 sql_example = r#"```sql
54> select array_length([1, 2, 3, 4, 5], 1);
55+-------------------------------------------+
56| array_length(List([1,2,3,4,5]), 1) |
57+-------------------------------------------+
58| 5 |
59+-------------------------------------------+
60```"#,
61 argument(
62 name = "array",
63 description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
64 ),
65 argument(name = "dimension", description = "Array dimension.")
66)]
67#[derive(Debug)]
68pub struct ArrayLength {
69 signature: Signature,
70 aliases: Vec<String>,
71}
72
73impl Default for ArrayLength {
74 fn default() -> Self {
75 Self::new()
76 }
77}
78
79impl ArrayLength {
80 pub fn new() -> Self {
81 Self {
82 signature: Signature::variadic_any(Volatility::Immutable),
83 aliases: vec![String::from("list_length")],
84 }
85 }
86}
87
88impl ScalarUDFImpl for ArrayLength {
89 fn as_any(&self) -> &dyn Any {
90 self
91 }
92 fn name(&self) -> &str {
93 "array_length"
94 }
95
96 fn signature(&self) -> &Signature {
97 &self.signature
98 }
99
100 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
101 Ok(match arg_types[0] {
102 List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
103 _ => {
104 return plan_err!("The array_length function can only accept List/LargeList/FixedSizeList.");
105 }
106 })
107 }
108
109 fn invoke_with_args(
110 &self,
111 args: datafusion_expr::ScalarFunctionArgs,
112 ) -> Result<ColumnarValue> {
113 make_scalar_function(array_length_inner)(&args.args)
114 }
115
116 fn aliases(&self) -> &[String] {
117 &self.aliases
118 }
119
120 fn documentation(&self) -> Option<&Documentation> {
121 self.doc()
122 }
123}
124
125macro_rules! array_length_impl {
126 ($array:expr, $dimension:expr) => {{
127 let array = $array;
128 let dimension = match $dimension {
129 Some(d) => as_int64_array(d)?.clone(),
130 None => Int64Array::from_value(1, array.len()),
131 };
132 let result = array
133 .iter()
134 .zip(dimension.iter())
135 .map(|(arr, dim)| compute_array_length(arr, dim))
136 .collect::<Result<UInt64Array>>()?;
137
138 Ok(Arc::new(result) as ArrayRef)
139 }};
140}
141
142pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
144 if args.len() != 1 && args.len() != 2 {
145 return exec_err!("array_length expects one or two arguments");
146 }
147
148 match &args[0].data_type() {
149 List(_) => general_array_length::<i32>(args),
150 LargeList(_) => general_array_length::<i64>(args),
151 FixedSizeList(_, _) => fixed_size_array_length(args),
152 array_type => exec_err!("array_length does not support type '{array_type:?}'"),
153 }
154}
155
156fn fixed_size_array_length(array: &[ArrayRef]) -> Result<ArrayRef> {
157 array_length_impl!(as_fixed_size_list_array(&array[0])?, array.get(1))
158}
159
160fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayRef> {
162 array_length_impl!(as_generic_list_array::<O>(&array[0])?, array.get(1))
163}
164
165fn compute_array_length(
167 arr: Option<ArrayRef>,
168 dimension: Option<i64>,
169) -> Result<Option<u64>> {
170 let mut current_dimension: i64 = 1;
171 let mut value = match arr {
172 Some(arr) => arr,
173 None => return Ok(None),
174 };
175 let dimension = match dimension {
176 Some(value) => {
177 if value < 1 {
178 return Ok(None);
179 }
180
181 value
182 }
183 None => return Ok(None),
184 };
185
186 loop {
187 if current_dimension == dimension {
188 return Ok(Some(value.len() as u64));
189 }
190
191 match value.data_type() {
192 List(..) => {
193 value = downcast_arg!(value, ListArray).value(0);
194 current_dimension += 1;
195 }
196 LargeList(..) => {
197 value = downcast_arg!(value, LargeListArray).value(0);
198 current_dimension += 1;
199 }
200 FixedSizeList(_, _) => {
201 value = downcast_arg!(value, FixedSizeListArray).value(0);
202 current_dimension += 1;
203 }
204 _ => return Ok(None),
205 }
206 }
207}