datafusion_functions_nested/
length.rs1use crate::utils::make_scalar_function;
21use arrow::array::{
22 Array, ArrayRef, FixedSizeListArray, Int64Array, LargeListArray, ListArray,
23 OffsetSizeTrait, UInt64Array,
24};
25use arrow::datatypes::{
26 DataType,
27 DataType::{FixedSizeList, LargeList, List, UInt64},
28};
29use datafusion_common::cast::{
30 as_fixed_size_list_array, as_generic_list_array, as_int64_array,
31};
32use datafusion_common::{Result, exec_err};
33use datafusion_expr::{
34 ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
35 ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
36};
37use datafusion_functions::downcast_arg;
38use datafusion_macros::user_doc;
39use std::sync::Arc;
40
41make_udf_expr_and_func!(
42 ArrayLength,
43 array_length,
44 array,
45 "returns the length of the array dimension.",
46 array_length_udf
47);
48
49#[user_doc(
50 doc_section(label = "Array Functions"),
51 description = "Returns the length of the array dimension.",
52 syntax_example = "array_length(array, dimension)",
53 sql_example = r#"```sql
54> select array_length([1, 2, 3, 4, 5], 1);
55+-------------------------------------------+
56| array_length(List([1,2,3,4,5]), 1) |
57+-------------------------------------------+
58| 5 |
59+-------------------------------------------+
60```"#,
61 argument(
62 name = "array",
63 description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
64 ),
65 argument(name = "dimension", description = "Array dimension.")
66)]
67#[derive(Debug, PartialEq, Eq, Hash)]
68pub struct ArrayLength {
69 signature: Signature,
70 aliases: Vec<String>,
71}
72
73impl Default for ArrayLength {
74 fn default() -> Self {
75 Self::new()
76 }
77}
78
79impl ArrayLength {
80 pub fn new() -> Self {
81 Self {
82 signature: Signature::one_of(
83 vec![
84 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
85 arguments: vec![ArrayFunctionArgument::Array],
86 array_coercion: None,
87 }),
88 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
89 arguments: vec![
90 ArrayFunctionArgument::Array,
91 ArrayFunctionArgument::Index,
92 ],
93 array_coercion: None,
94 }),
95 ],
96 Volatility::Immutable,
97 ),
98 aliases: vec![String::from("list_length")],
99 }
100 }
101}
102
103impl ScalarUDFImpl for ArrayLength {
104 fn name(&self) -> &str {
105 "array_length"
106 }
107
108 fn signature(&self) -> &Signature {
109 &self.signature
110 }
111
112 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
113 Ok(UInt64)
114 }
115
116 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
117 make_scalar_function(array_length_inner)(&args.args)
118 }
119
120 fn aliases(&self) -> &[String] {
121 &self.aliases
122 }
123
124 fn documentation(&self) -> Option<&Documentation> {
125 self.doc()
126 }
127}
128
129macro_rules! array_length_impl {
130 ($array:expr, $dimension:expr) => {{
131 let array = $array;
132 let dimension = match $dimension {
133 Some(d) => as_int64_array(d)?.clone(),
134 None => Int64Array::from_value(1, array.len()),
135 };
136 let result = array
137 .iter()
138 .zip(dimension.iter())
139 .map(|(arr, dim)| compute_array_length(arr, dim))
140 .collect::<Result<UInt64Array>>()?;
141
142 Ok(Arc::new(result) as ArrayRef)
143 }};
144}
145
146fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
147 if args.len() != 1 && args.len() != 2 {
148 return exec_err!("array_length expects one or two arguments");
149 }
150
151 match &args[0].data_type() {
152 List(_) => general_array_length::<i32>(args),
153 LargeList(_) => general_array_length::<i64>(args),
154 FixedSizeList(_, _) => fixed_size_array_length(args),
155 array_type => exec_err!("array_length does not support type '{array_type}'"),
156 }
157}
158
159fn fixed_size_array_length(array: &[ArrayRef]) -> Result<ArrayRef> {
160 array_length_impl!(as_fixed_size_list_array(&array[0])?, array.get(1))
161}
162
163fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayRef> {
165 array_length_impl!(as_generic_list_array::<O>(&array[0])?, array.get(1))
166}
167
168fn compute_array_length(
170 arr: Option<ArrayRef>,
171 dimension: Option<i64>,
172) -> Result<Option<u64>> {
173 let mut current_dimension: i64 = 1;
174 let mut value = match arr {
175 Some(arr) => arr,
176 None => return Ok(None),
177 };
178 let dimension = match dimension {
179 Some(value) => {
180 if value < 1 {
181 return Ok(None);
182 }
183
184 value
185 }
186 None => return Ok(None),
187 };
188
189 loop {
190 if current_dimension == dimension {
191 return Ok(Some(value.len() as u64));
192 }
193
194 match value.data_type() {
195 List(..) => {
196 value = downcast_arg!(value, ListArray).value(0);
197 current_dimension += 1;
198 }
199 LargeList(..) => {
200 value = downcast_arg!(value, LargeListArray).value(0);
201 current_dimension += 1;
202 }
203 FixedSizeList(_, _) => {
204 value = downcast_arg!(value, FixedSizeListArray).value(0);
205 current_dimension += 1;
206 }
207 _ => return Ok(None),
208 }
209 }
210}