Skip to main content

datafusion_functions_nested/
dimension.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for array_dims and array_ndims functions.
19
20use arrow::array::{Array, ArrayRef, ListArray, UInt64Array};
21use arrow::datatypes::{
22    DataType,
23    DataType::{FixedSizeList, LargeList, List, Null, UInt64},
24    UInt64Type,
25};
26
27use datafusion_common::cast::{
28    as_fixed_size_list_array, as_large_list_array, as_list_array,
29};
30use datafusion_common::{Result, exec_err, utils::take_function_args};
31
32use crate::utils::{compute_array_dims, make_scalar_function};
33use datafusion_common::utils::list_ndims;
34use datafusion_expr::{
35    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
36    Volatility,
37};
38use datafusion_macros::user_doc;
39use itertools::Itertools;
40use std::sync::Arc;
41
42make_udf_expr_and_func!(
43    ArrayDims,
44    array_dims,
45    array,
46    "returns an array of the array's dimensions.",
47    array_dims_udf
48);
49
50#[user_doc(
51    doc_section(label = "Array Functions"),
52    description = "Returns an array of the array's dimensions.",
53    syntax_example = "array_dims(array)",
54    sql_example = r#"```sql
55> select array_dims([[1, 2, 3], [4, 5, 6]]);
56+---------------------------------+
57| array_dims(List([1,2,3,4,5,6])) |
58+---------------------------------+
59| [2, 3]                          |
60+---------------------------------+
61```"#,
62    argument(
63        name = "array",
64        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
65    )
66)]
67#[derive(Debug, PartialEq, Eq, Hash)]
68pub struct ArrayDims {
69    signature: Signature,
70    aliases: Vec<String>,
71}
72
73impl Default for ArrayDims {
74    fn default() -> Self {
75        Self::new()
76    }
77}
78
79impl ArrayDims {
80    pub fn new() -> Self {
81        Self {
82            signature: Signature::arrays(1, None, Volatility::Immutable),
83            aliases: vec!["list_dims".to_string()],
84        }
85    }
86}
87
88impl ScalarUDFImpl for ArrayDims {
89    fn name(&self) -> &str {
90        "array_dims"
91    }
92
93    fn signature(&self) -> &Signature {
94        &self.signature
95    }
96
97    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
98        Ok(DataType::new_list(UInt64, true))
99    }
100
101    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
102        make_scalar_function(array_dims_inner)(&args.args)
103    }
104
105    fn aliases(&self) -> &[String] {
106        &self.aliases
107    }
108
109    fn documentation(&self) -> Option<&Documentation> {
110        self.doc()
111    }
112}
113
114make_udf_expr_and_func!(
115    ArrayNdims,
116    array_ndims,
117    array,
118    "returns the number of dimensions of the array.",
119    array_ndims_udf
120);
121
122#[user_doc(
123    doc_section(label = "Array Functions"),
124    description = "Returns the number of dimensions of the array.",
125    syntax_example = "array_ndims(array, element)",
126    sql_example = r#"```sql
127> select array_ndims([[1, 2, 3], [4, 5, 6]]);
128+----------------------------------+
129| array_ndims(List([1,2,3,4,5,6])) |
130+----------------------------------+
131| 2                                |
132+----------------------------------+
133```"#,
134    argument(
135        name = "array",
136        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
137    ),
138    argument(name = "element", description = "Array element.")
139)]
140#[derive(Debug, PartialEq, Eq, Hash)]
141pub(super) struct ArrayNdims {
142    signature: Signature,
143    aliases: Vec<String>,
144}
145impl ArrayNdims {
146    pub fn new() -> Self {
147        Self {
148            signature: Signature::arrays(1, None, Volatility::Immutable),
149            aliases: vec![String::from("list_ndims")],
150        }
151    }
152}
153
154impl ScalarUDFImpl for ArrayNdims {
155    fn name(&self) -> &str {
156        "array_ndims"
157    }
158
159    fn signature(&self) -> &Signature {
160        &self.signature
161    }
162
163    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
164        Ok(UInt64)
165    }
166
167    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
168        make_scalar_function(array_ndims_inner)(&args.args)
169    }
170
171    fn aliases(&self) -> &[String] {
172        &self.aliases
173    }
174
175    fn documentation(&self) -> Option<&Documentation> {
176        self.doc()
177    }
178}
179
180fn array_dims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
181    let [array] = take_function_args("array_dims", args)?;
182    let data: Vec<_> = match array.data_type() {
183        List(_) => as_list_array(&array)?
184            .iter()
185            .map(compute_array_dims)
186            .try_collect()?,
187        LargeList(_) => as_large_list_array(&array)?
188            .iter()
189            .map(compute_array_dims)
190            .try_collect()?,
191        FixedSizeList(..) => as_fixed_size_list_array(&array)?
192            .iter()
193            .map(compute_array_dims)
194            .try_collect()?,
195        arg_type => {
196            return exec_err!("array_dims does not support type {arg_type}");
197        }
198    };
199
200    let result = ListArray::from_iter_primitive::<UInt64Type, _, _>(data);
201    Ok(Arc::new(result))
202}
203
204fn array_ndims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
205    let [array] = take_function_args("array_ndims", args)?;
206
207    fn general_list_ndims(array: &ArrayRef) -> Result<ArrayRef> {
208        let ndims = list_ndims(array.data_type());
209        let data = vec![ndims; array.len()];
210        let result = UInt64Array::new(data.into(), array.nulls().cloned());
211        Ok(Arc::new(result))
212    }
213
214    match array.data_type() {
215        Null => Ok(Arc::new(UInt64Array::new_null(array.len()))),
216        List(_) | LargeList(_) | FixedSizeList(..) => general_list_ndims(array),
217        arg_type => exec_err!("array_ndims does not support type {arg_type}"),
218    }
219}