datafusion_functions_nested/
dimension.rs
1use arrow::array::{Array, ArrayRef, ListArray, UInt64Array};
21use arrow::datatypes::{
22 DataType,
23 DataType::{FixedSizeList, LargeList, List, Null, UInt64},
24 UInt64Type,
25};
26use std::any::Any;
27
28use datafusion_common::cast::{
29 as_fixed_size_list_array, as_large_list_array, as_list_array,
30};
31use datafusion_common::{exec_err, utils::take_function_args, Result};
32
33use crate::utils::{compute_array_dims, make_scalar_function};
34use datafusion_common::utils::list_ndims;
35use datafusion_expr::{
36 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
37};
38use datafusion_macros::user_doc;
39use itertools::Itertools;
40use std::sync::Arc;
41
42make_udf_expr_and_func!(
43 ArrayDims,
44 array_dims,
45 array,
46 "returns an array of the array's dimensions.",
47 array_dims_udf
48);
49
50#[user_doc(
51 doc_section(label = "Array Functions"),
52 description = "Returns an array of the array's dimensions.",
53 syntax_example = "array_dims(array)",
54 sql_example = r#"```sql
55> select array_dims([[1, 2, 3], [4, 5, 6]]);
56+---------------------------------+
57| array_dims(List([1,2,3,4,5,6])) |
58+---------------------------------+
59| [2, 3] |
60+---------------------------------+
61```"#,
62 argument(
63 name = "array",
64 description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
65 )
66)]
67#[derive(Debug)]
68pub struct ArrayDims {
69 signature: Signature,
70 aliases: Vec<String>,
71}
72
73impl Default for ArrayDims {
74 fn default() -> Self {
75 Self::new()
76 }
77}
78
79impl ArrayDims {
80 pub fn new() -> Self {
81 Self {
82 signature: Signature::arrays(1, None, Volatility::Immutable),
83 aliases: vec!["list_dims".to_string()],
84 }
85 }
86}
87
88impl ScalarUDFImpl for ArrayDims {
89 fn as_any(&self) -> &dyn Any {
90 self
91 }
92 fn name(&self) -> &str {
93 "array_dims"
94 }
95
96 fn signature(&self) -> &Signature {
97 &self.signature
98 }
99
100 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
101 Ok(DataType::new_list(UInt64, true))
102 }
103
104 fn invoke_with_args(
105 &self,
106 args: datafusion_expr::ScalarFunctionArgs,
107 ) -> Result<ColumnarValue> {
108 make_scalar_function(array_dims_inner)(&args.args)
109 }
110
111 fn aliases(&self) -> &[String] {
112 &self.aliases
113 }
114
115 fn documentation(&self) -> Option<&Documentation> {
116 self.doc()
117 }
118}
119
120make_udf_expr_and_func!(
121 ArrayNdims,
122 array_ndims,
123 array,
124 "returns the number of dimensions of the array.",
125 array_ndims_udf
126);
127
128#[user_doc(
129 doc_section(label = "Array Functions"),
130 description = "Returns the number of dimensions of the array.",
131 syntax_example = "array_ndims(array, element)",
132 sql_example = r#"```sql
133> select array_ndims([[1, 2, 3], [4, 5, 6]]);
134+----------------------------------+
135| array_ndims(List([1,2,3,4,5,6])) |
136+----------------------------------+
137| 2 |
138+----------------------------------+
139```"#,
140 argument(
141 name = "array",
142 description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
143 ),
144 argument(name = "element", description = "Array element.")
145)]
146#[derive(Debug)]
147pub(super) struct ArrayNdims {
148 signature: Signature,
149 aliases: Vec<String>,
150}
151impl ArrayNdims {
152 pub fn new() -> Self {
153 Self {
154 signature: Signature::arrays(1, None, Volatility::Immutable),
155 aliases: vec![String::from("list_ndims")],
156 }
157 }
158}
159
160impl ScalarUDFImpl for ArrayNdims {
161 fn as_any(&self) -> &dyn Any {
162 self
163 }
164 fn name(&self) -> &str {
165 "array_ndims"
166 }
167
168 fn signature(&self) -> &Signature {
169 &self.signature
170 }
171
172 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
173 Ok(UInt64)
174 }
175
176 fn invoke_with_args(
177 &self,
178 args: datafusion_expr::ScalarFunctionArgs,
179 ) -> Result<ColumnarValue> {
180 make_scalar_function(array_ndims_inner)(&args.args)
181 }
182
183 fn aliases(&self) -> &[String] {
184 &self.aliases
185 }
186
187 fn documentation(&self) -> Option<&Documentation> {
188 self.doc()
189 }
190}
191
192pub fn array_dims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
194 let [array] = take_function_args("array_dims", args)?;
195 let data: Vec<_> = match array.data_type() {
196 List(_) => as_list_array(&array)?
197 .iter()
198 .map(compute_array_dims)
199 .try_collect()?,
200 LargeList(_) => as_large_list_array(&array)?
201 .iter()
202 .map(compute_array_dims)
203 .try_collect()?,
204 FixedSizeList(..) => as_fixed_size_list_array(&array)?
205 .iter()
206 .map(compute_array_dims)
207 .try_collect()?,
208 arg_type => {
209 return exec_err!("array_dims does not support type {arg_type}");
210 }
211 };
212
213 let result = ListArray::from_iter_primitive::<UInt64Type, _, _>(data);
214 Ok(Arc::new(result))
215}
216
217pub fn array_ndims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
219 let [array] = take_function_args("array_ndims", args)?;
220
221 fn general_list_ndims(array: &ArrayRef) -> Result<ArrayRef> {
222 let ndims = list_ndims(array.data_type());
223 let data = vec![ndims; array.len()];
224 let result = UInt64Array::new(data.into(), array.nulls().cloned());
225 Ok(Arc::new(result))
226 }
227
228 match array.data_type() {
229 Null => Ok(Arc::new(UInt64Array::new_null(array.len()))),
230 List(_) | LargeList(_) | FixedSizeList(..) => general_list_ndims(array),
231 arg_type => exec_err!("array_ndims does not support type {arg_type}"),
232 }
233}