datafusion_functions_nested/
length.rs1use crate::utils::make_scalar_function;
21use arrow::array::{
22 Array, ArrayRef, FixedSizeListArray, Int64Array, LargeListArray, ListArray,
23 OffsetSizeTrait, UInt64Array,
24};
25use arrow::datatypes::{
26 DataType,
27 DataType::{FixedSizeList, LargeList, List, UInt64},
28};
29use datafusion_common::cast::{
30 as_fixed_size_list_array, as_generic_list_array, as_int64_array,
31};
32use datafusion_common::{exec_err, Result};
33use datafusion_expr::{
34 ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
35 ScalarUDFImpl, Signature, TypeSignature, Volatility,
36};
37use datafusion_functions::downcast_arg;
38use datafusion_macros::user_doc;
39use std::any::Any;
40use std::sync::Arc;
41
42make_udf_expr_and_func!(
43 ArrayLength,
44 array_length,
45 array,
46 "returns the length of the array dimension.",
47 array_length_udf
48);
49
50#[user_doc(
51 doc_section(label = "Array Functions"),
52 description = "Returns the length of the array dimension.",
53 syntax_example = "array_length(array, dimension)",
54 sql_example = r#"```sql
55> select array_length([1, 2, 3, 4, 5], 1);
56+-------------------------------------------+
57| array_length(List([1,2,3,4,5]), 1) |
58+-------------------------------------------+
59| 5 |
60+-------------------------------------------+
61```"#,
62 argument(
63 name = "array",
64 description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
65 ),
66 argument(name = "dimension", description = "Array dimension.")
67)]
68#[derive(Debug, PartialEq, Eq, Hash)]
69pub struct ArrayLength {
70 signature: Signature,
71 aliases: Vec<String>,
72}
73
74impl Default for ArrayLength {
75 fn default() -> Self {
76 Self::new()
77 }
78}
79
80impl ArrayLength {
81 pub fn new() -> Self {
82 Self {
83 signature: Signature::one_of(
84 vec![
85 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
86 arguments: vec![ArrayFunctionArgument::Array],
87 array_coercion: None,
88 }),
89 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
90 arguments: vec![
91 ArrayFunctionArgument::Array,
92 ArrayFunctionArgument::Index,
93 ],
94 array_coercion: None,
95 }),
96 ],
97 Volatility::Immutable,
98 ),
99 aliases: vec![String::from("list_length")],
100 }
101 }
102}
103
104impl ScalarUDFImpl for ArrayLength {
105 fn as_any(&self) -> &dyn Any {
106 self
107 }
108 fn name(&self) -> &str {
109 "array_length"
110 }
111
112 fn signature(&self) -> &Signature {
113 &self.signature
114 }
115
116 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
117 Ok(UInt64)
118 }
119
120 fn invoke_with_args(
121 &self,
122 args: datafusion_expr::ScalarFunctionArgs,
123 ) -> Result<ColumnarValue> {
124 make_scalar_function(array_length_inner)(&args.args)
125 }
126
127 fn aliases(&self) -> &[String] {
128 &self.aliases
129 }
130
131 fn documentation(&self) -> Option<&Documentation> {
132 self.doc()
133 }
134}
135
136macro_rules! array_length_impl {
137 ($array:expr, $dimension:expr) => {{
138 let array = $array;
139 let dimension = match $dimension {
140 Some(d) => as_int64_array(d)?.clone(),
141 None => Int64Array::from_value(1, array.len()),
142 };
143 let result = array
144 .iter()
145 .zip(dimension.iter())
146 .map(|(arr, dim)| compute_array_length(arr, dim))
147 .collect::<Result<UInt64Array>>()?;
148
149 Ok(Arc::new(result) as ArrayRef)
150 }};
151}
152
153pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
155 if args.len() != 1 && args.len() != 2 {
156 return exec_err!("array_length expects one or two arguments");
157 }
158
159 match &args[0].data_type() {
160 List(_) => general_array_length::<i32>(args),
161 LargeList(_) => general_array_length::<i64>(args),
162 FixedSizeList(_, _) => fixed_size_array_length(args),
163 array_type => exec_err!("array_length does not support type '{array_type}'"),
164 }
165}
166
167fn fixed_size_array_length(array: &[ArrayRef]) -> Result<ArrayRef> {
168 array_length_impl!(as_fixed_size_list_array(&array[0])?, array.get(1))
169}
170
171fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayRef> {
173 array_length_impl!(as_generic_list_array::<O>(&array[0])?, array.get(1))
174}
175
176fn compute_array_length(
178 arr: Option<ArrayRef>,
179 dimension: Option<i64>,
180) -> Result<Option<u64>> {
181 let mut current_dimension: i64 = 1;
182 let mut value = match arr {
183 Some(arr) => arr,
184 None => return Ok(None),
185 };
186 let dimension = match dimension {
187 Some(value) => {
188 if value < 1 {
189 return Ok(None);
190 }
191
192 value
193 }
194 None => return Ok(None),
195 };
196
197 loop {
198 if current_dimension == dimension {
199 return Ok(Some(value.len() as u64));
200 }
201
202 match value.data_type() {
203 List(..) => {
204 value = downcast_arg!(value, ListArray).value(0);
205 current_dimension += 1;
206 }
207 LargeList(..) => {
208 value = downcast_arg!(value, LargeListArray).value(0);
209 current_dimension += 1;
210 }
211 FixedSizeList(_, _) => {
212 value = downcast_arg!(value, FixedSizeListArray).value(0);
213 current_dimension += 1;
214 }
215 _ => return Ok(None),
216 }
217 }
218}