datafusion_functions_nested/
length.rs1use crate::utils::make_scalar_function;
21use arrow::array::{
22 Array, ArrayRef, Int64Array, LargeListArray, ListArray, OffsetSizeTrait, UInt64Array,
23};
24use arrow::datatypes::{
25 DataType,
26 DataType::{FixedSizeList, LargeList, List, UInt64},
27};
28use datafusion_common::cast::{as_generic_list_array, as_int64_array};
29use datafusion_common::{exec_err, internal_datafusion_err, plan_err, Result};
30use datafusion_expr::{
31 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
32};
33use datafusion_functions::{downcast_arg, downcast_named_arg};
34use datafusion_macros::user_doc;
35use std::any::Any;
36use std::sync::Arc;
37
38make_udf_expr_and_func!(
39 ArrayLength,
40 array_length,
41 array,
42 "returns the length of the array dimension.",
43 array_length_udf
44);
45
46#[user_doc(
47 doc_section(label = "Array Functions"),
48 description = "Returns the length of the array dimension.",
49 syntax_example = "array_length(array, dimension)",
50 sql_example = r#"```sql
51> select array_length([1, 2, 3, 4, 5], 1);
52+-------------------------------------------+
53| array_length(List([1,2,3,4,5]), 1) |
54+-------------------------------------------+
55| 5 |
56+-------------------------------------------+
57```"#,
58 argument(
59 name = "array",
60 description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
61 ),
62 argument(name = "dimension", description = "Array dimension.")
63)]
64#[derive(Debug)]
65pub struct ArrayLength {
66 signature: Signature,
67 aliases: Vec<String>,
68}
69
70impl Default for ArrayLength {
71 fn default() -> Self {
72 Self::new()
73 }
74}
75
76impl ArrayLength {
77 pub fn new() -> Self {
78 Self {
79 signature: Signature::variadic_any(Volatility::Immutable),
80 aliases: vec![String::from("list_length")],
81 }
82 }
83}
84
85impl ScalarUDFImpl for ArrayLength {
86 fn as_any(&self) -> &dyn Any {
87 self
88 }
89 fn name(&self) -> &str {
90 "array_length"
91 }
92
93 fn signature(&self) -> &Signature {
94 &self.signature
95 }
96
97 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
98 Ok(match arg_types[0] {
99 List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
100 _ => {
101 return plan_err!("The array_length function can only accept List/LargeList/FixedSizeList.");
102 }
103 })
104 }
105
106 fn invoke_with_args(
107 &self,
108 args: datafusion_expr::ScalarFunctionArgs,
109 ) -> Result<ColumnarValue> {
110 make_scalar_function(array_length_inner)(&args.args)
111 }
112
113 fn aliases(&self) -> &[String] {
114 &self.aliases
115 }
116
117 fn documentation(&self) -> Option<&Documentation> {
118 self.doc()
119 }
120}
121
122pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
124 if args.len() != 1 && args.len() != 2 {
125 return exec_err!("array_length expects one or two arguments");
126 }
127
128 match &args[0].data_type() {
129 List(_) => general_array_length::<i32>(args),
130 LargeList(_) => general_array_length::<i64>(args),
131 array_type => exec_err!("array_length does not support type '{array_type:?}'"),
132 }
133}
134
135fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) -> Result<ArrayRef> {
137 let list_array = as_generic_list_array::<O>(&array[0])?;
138 let dimension = if array.len() == 2 {
139 as_int64_array(&array[1])?.clone()
140 } else {
141 Int64Array::from_value(1, list_array.len())
142 };
143
144 let result = list_array
145 .iter()
146 .zip(dimension.iter())
147 .map(|(arr, dim)| compute_array_length(arr, dim))
148 .collect::<Result<UInt64Array>>()?;
149
150 Ok(Arc::new(result) as ArrayRef)
151}
152
153fn compute_array_length(
155 arr: Option<ArrayRef>,
156 dimension: Option<i64>,
157) -> Result<Option<u64>> {
158 let mut current_dimension: i64 = 1;
159 let mut value = match arr {
160 Some(arr) => arr,
161 None => return Ok(None),
162 };
163 let dimension = match dimension {
164 Some(value) => {
165 if value < 1 {
166 return Ok(None);
167 }
168
169 value
170 }
171 None => return Ok(None),
172 };
173
174 loop {
175 if current_dimension == dimension {
176 return Ok(Some(value.len() as u64));
177 }
178
179 match value.data_type() {
180 List(..) => {
181 value = downcast_arg!(value, ListArray).value(0);
182 current_dimension += 1;
183 }
184 LargeList(..) => {
185 value = downcast_arg!(value, LargeListArray).value(0);
186 current_dimension += 1;
187 }
188 _ => return Ok(None),
189 }
190 }
191}