datafusion_functions_nested/
dimension.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for array_dims and array_ndims functions.
19
20use arrow::array::{Array, ArrayRef, ListArray, UInt64Array};
21use arrow::datatypes::{
22    DataType,
23    DataType::{FixedSizeList, LargeList, List, Null, UInt64},
24    UInt64Type,
25};
26use std::any::Any;
27
28use datafusion_common::cast::{
29    as_fixed_size_list_array, as_large_list_array, as_list_array,
30};
31use datafusion_common::{exec_err, utils::take_function_args, Result};
32
33use crate::utils::{compute_array_dims, make_scalar_function};
34use datafusion_common::utils::list_ndims;
35use datafusion_expr::{
36    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
37};
38use datafusion_macros::user_doc;
39use itertools::Itertools;
40use std::sync::Arc;
41
42make_udf_expr_and_func!(
43    ArrayDims,
44    array_dims,
45    array,
46    "returns an array of the array's dimensions.",
47    array_dims_udf
48);
49
50#[user_doc(
51    doc_section(label = "Array Functions"),
52    description = "Returns an array of the array's dimensions.",
53    syntax_example = "array_dims(array)",
54    sql_example = r#"```sql
55> select array_dims([[1, 2, 3], [4, 5, 6]]);
56+---------------------------------+
57| array_dims(List([1,2,3,4,5,6])) |
58+---------------------------------+
59| [2, 3]                          |
60+---------------------------------+
61```"#,
62    argument(
63        name = "array",
64        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
65    )
66)]
67#[derive(Debug)]
68pub struct ArrayDims {
69    signature: Signature,
70    aliases: Vec<String>,
71}
72
73impl Default for ArrayDims {
74    fn default() -> Self {
75        Self::new()
76    }
77}
78
79impl ArrayDims {
80    pub fn new() -> Self {
81        Self {
82            signature: Signature::arrays(1, None, Volatility::Immutable),
83            aliases: vec!["list_dims".to_string()],
84        }
85    }
86}
87
88impl ScalarUDFImpl for ArrayDims {
89    fn as_any(&self) -> &dyn Any {
90        self
91    }
92    fn name(&self) -> &str {
93        "array_dims"
94    }
95
96    fn signature(&self) -> &Signature {
97        &self.signature
98    }
99
100    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
101        Ok(DataType::new_list(UInt64, true))
102    }
103
104    fn invoke_with_args(
105        &self,
106        args: datafusion_expr::ScalarFunctionArgs,
107    ) -> Result<ColumnarValue> {
108        make_scalar_function(array_dims_inner)(&args.args)
109    }
110
111    fn aliases(&self) -> &[String] {
112        &self.aliases
113    }
114
115    fn documentation(&self) -> Option<&Documentation> {
116        self.doc()
117    }
118}
119
120make_udf_expr_and_func!(
121    ArrayNdims,
122    array_ndims,
123    array,
124    "returns the number of dimensions of the array.",
125    array_ndims_udf
126);
127
128#[user_doc(
129    doc_section(label = "Array Functions"),
130    description = "Returns the number of dimensions of the array.",
131    syntax_example = "array_ndims(array, element)",
132    sql_example = r#"```sql
133> select array_ndims([[1, 2, 3], [4, 5, 6]]);
134+----------------------------------+
135| array_ndims(List([1,2,3,4,5,6])) |
136+----------------------------------+
137| 2                                |
138+----------------------------------+
139```"#,
140    argument(
141        name = "array",
142        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
143    ),
144    argument(name = "element", description = "Array element.")
145)]
146#[derive(Debug)]
147pub(super) struct ArrayNdims {
148    signature: Signature,
149    aliases: Vec<String>,
150}
151impl ArrayNdims {
152    pub fn new() -> Self {
153        Self {
154            signature: Signature::arrays(1, None, Volatility::Immutable),
155            aliases: vec![String::from("list_ndims")],
156        }
157    }
158}
159
160impl ScalarUDFImpl for ArrayNdims {
161    fn as_any(&self) -> &dyn Any {
162        self
163    }
164    fn name(&self) -> &str {
165        "array_ndims"
166    }
167
168    fn signature(&self) -> &Signature {
169        &self.signature
170    }
171
172    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
173        Ok(UInt64)
174    }
175
176    fn invoke_with_args(
177        &self,
178        args: datafusion_expr::ScalarFunctionArgs,
179    ) -> Result<ColumnarValue> {
180        make_scalar_function(array_ndims_inner)(&args.args)
181    }
182
183    fn aliases(&self) -> &[String] {
184        &self.aliases
185    }
186
187    fn documentation(&self) -> Option<&Documentation> {
188        self.doc()
189    }
190}
191
192/// Array_dims SQL function
193pub fn array_dims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
194    let [array] = take_function_args("array_dims", args)?;
195    let data: Vec<_> = match array.data_type() {
196        List(_) => as_list_array(&array)?
197            .iter()
198            .map(compute_array_dims)
199            .try_collect()?,
200        LargeList(_) => as_large_list_array(&array)?
201            .iter()
202            .map(compute_array_dims)
203            .try_collect()?,
204        FixedSizeList(..) => as_fixed_size_list_array(&array)?
205            .iter()
206            .map(compute_array_dims)
207            .try_collect()?,
208        arg_type => {
209            return exec_err!("array_dims does not support type {arg_type}");
210        }
211    };
212
213    let result = ListArray::from_iter_primitive::<UInt64Type, _, _>(data);
214    Ok(Arc::new(result))
215}
216
217/// Array_ndims SQL function
218pub fn array_ndims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
219    let [array] = take_function_args("array_ndims", args)?;
220
221    fn general_list_ndims(array: &ArrayRef) -> Result<ArrayRef> {
222        let ndims = list_ndims(array.data_type());
223        let data = vec![ndims; array.len()];
224        let result = UInt64Array::new(data.into(), array.nulls().cloned());
225        Ok(Arc::new(result))
226    }
227
228    match array.data_type() {
229        Null => Ok(Arc::new(UInt64Array::new_null(array.len()))),
230        List(_) | LargeList(_) | FixedSizeList(..) => general_list_ndims(array),
231        arg_type => exec_err!("array_ndims does not support type {arg_type}"),
232    }
233}