datafusion_functions_nested/
length.rs1use crate::utils::make_scalar_function;
21use arrow::array::{
22 Array, ArrayRef, FixedSizeListArray, Int64Array, LargeListArray, ListArray,
23 OffsetSizeTrait, UInt64Array,
24};
25use arrow::datatypes::{
26 DataType,
27 DataType::{FixedSizeList, LargeList, List, UInt64},
28};
29use datafusion_common::cast::{
30 as_fixed_size_list_array, as_generic_list_array, as_int64_array,
31};
32use datafusion_common::{Result, exec_err};
33use datafusion_expr::{
34 ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
35 ScalarUDFImpl, Signature, TypeSignature, Volatility,
36};
37use datafusion_functions::downcast_arg;
38use datafusion_macros::user_doc;
39use std::any::Any;
40use std::sync::Arc;
41
42make_udf_expr_and_func!(
43 ArrayLength,
44 array_length,
45 array,
46 "returns the length of the array dimension.",
47 array_length_udf
48);
49
50#[user_doc(
51 doc_section(label = "Array Functions"),
52 description = "Returns the length of the array dimension.",
53 syntax_example = "array_length(array, dimension)",
54 sql_example = r#"```sql
55> select array_length([1, 2, 3, 4, 5], 1);
56+-------------------------------------------+
57| array_length(List([1,2,3,4,5]), 1) |
58+-------------------------------------------+
59| 5 |
60+-------------------------------------------+
61```"#,
62 argument(
63 name = "array",
64 description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
65 ),
66 argument(name = "dimension", description = "Array dimension.")
67)]
68#[derive(Debug, PartialEq, Eq, Hash)]
69pub struct ArrayLength {
70 signature: Signature,
71 aliases: Vec<String>,
72}
73
74impl Default for ArrayLength {
75 fn default() -> Self {
76 Self::new()
77 }
78}
79
80impl ArrayLength {
81 pub fn new() -> Self {
82 Self {
83 signature: Signature::one_of(
84 vec![
85 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
86 arguments: vec![ArrayFunctionArgument::Array],
87 array_coercion: None,
88 }),
89 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
90 arguments: vec![
91 ArrayFunctionArgument::Array,
92 ArrayFunctionArgument::Index,
93 ],
94 array_coercion: None,
95 }),
96 ],
97 Volatility::Immutable,
98 ),
99 aliases: vec![String::from("list_length")],
100 }
101 }
102}
103
104impl ScalarUDFImpl for ArrayLength {
105 fn as_any(&self) -> &dyn Any {
106 self
107 }
108 fn name(&self) -> &str {
109 "array_length"
110 }
111
112 fn signature(&self) -> &Signature {
113 &self.signature
114 }
115
116 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
117 Ok(UInt64)
118 }
119
120 fn invoke_with_args(
121 &self,
122 args: datafusion_expr::ScalarFunctionArgs,
123 ) -> Result<ColumnarValue> {
124 make_scalar_function(array_length_inner)(&args.args)
125 }
126
127 fn aliases(&self) -> &[String] {
128 &self.aliases
129 }
130
131 fn documentation(&self) -> Option<&Documentation> {
132 self.doc()
133 }
134}
135
136macro_rules! array_length_impl {
137 ($array:expr, $dimension:expr) => {{
138 let array = $array;
139 let dimension = match $dimension {
140 Some(d) => as_int64_array(d)?.clone(),
141 None => Int64Array::from_value(1, array.len()),
142 };
143 let result = array
144 .iter()
145 .zip(dimension.iter())
146 .map(|(arr, dim)| compute_array_length(arr, dim))
147 .collect::<Result<UInt64Array>>()?;
148
149 Ok(Arc::new(result) as ArrayRef)
150 }};
151}
152
153fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
154 if args.len() != 1 && args.len() != 2 {
155 return exec_err!("array_length expects one or two arguments");
156 }
157
158 match &args[0].data_type() {
159 List(_) => general_array_length::<i32>(args),
160 LargeList(_) => general_array_length::<i64>(args),
161 FixedSizeList(_, _) => fixed_size_array_length(args),
162 array_type => exec_err!("array_length does not support type '{array_type}'"),
163 }
164}
165
166fn fixed_size_array_length(array: &[ArrayRef]) -> Result<ArrayRef> {
167 array_length_impl!(as_fixed_size_list_array(&array[0])?, array.get(1))
168}
169
170fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayRef> {
172 array_length_impl!(as_generic_list_array::<O>(&array[0])?, array.get(1))
173}
174
175fn compute_array_length(
177 arr: Option<ArrayRef>,
178 dimension: Option<i64>,
179) -> Result<Option<u64>> {
180 let mut current_dimension: i64 = 1;
181 let mut value = match arr {
182 Some(arr) => arr,
183 None => return Ok(None),
184 };
185 let dimension = match dimension {
186 Some(value) => {
187 if value < 1 {
188 return Ok(None);
189 }
190
191 value
192 }
193 None => return Ok(None),
194 };
195
196 loop {
197 if current_dimension == dimension {
198 return Ok(Some(value.len() as u64));
199 }
200
201 match value.data_type() {
202 List(..) => {
203 value = downcast_arg!(value, ListArray).value(0);
204 current_dimension += 1;
205 }
206 LargeList(..) => {
207 value = downcast_arg!(value, LargeListArray).value(0);
208 current_dimension += 1;
209 }
210 FixedSizeList(_, _) => {
211 value = downcast_arg!(value, FixedSizeListArray).value(0);
212 current_dimension += 1;
213 }
214 _ => return Ok(None),
215 }
216 }
217}