datafusion_functions_nested/
cardinality.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for cardinality function.
19
20use crate::utils::make_scalar_function;
21use arrow::array::{
22    Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, UInt64Array,
23};
24use arrow::datatypes::{
25    DataType,
26    DataType::{FixedSizeList, LargeList, List, Map, UInt64},
27};
28use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
29use datafusion_common::utils::take_function_args;
30use datafusion_common::Result;
31use datafusion_common::{exec_err, plan_err};
32use datafusion_expr::{
33    ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
34    ScalarUDFImpl, Signature, TypeSignature, Volatility,
35};
36use datafusion_macros::user_doc;
37use std::any::Any;
38use std::sync::Arc;
39
40make_udf_expr_and_func!(
41    Cardinality,
42    cardinality,
43    array,
44    "returns the total number of elements in the array or map.",
45    cardinality_udf
46);
47
48impl Cardinality {
49    pub fn new() -> Self {
50        Self {
51            signature: Signature::one_of(
52                vec![
53                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
54                        arguments: vec![ArrayFunctionArgument::Array],
55                        array_coercion: None,
56                    }),
57                    TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray),
58                ],
59                Volatility::Immutable,
60            ),
61            aliases: vec![],
62        }
63    }
64}
65
66#[user_doc(
67    doc_section(label = "Array Functions"),
68    description = "Returns the total number of elements in the array.",
69    syntax_example = "cardinality(array)",
70    sql_example = r#"```sql
71> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]);
72+--------------------------------------+
73| cardinality(List([1,2,3,4,5,6,7,8])) |
74+--------------------------------------+
75| 8                                    |
76+--------------------------------------+
77```"#,
78    argument(
79        name = "array",
80        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
81    )
82)]
83#[derive(Debug)]
84pub struct Cardinality {
85    signature: Signature,
86    aliases: Vec<String>,
87}
88
89impl Default for Cardinality {
90    fn default() -> Self {
91        Self::new()
92    }
93}
94impl ScalarUDFImpl for Cardinality {
95    fn as_any(&self) -> &dyn Any {
96        self
97    }
98    fn name(&self) -> &str {
99        "cardinality"
100    }
101
102    fn signature(&self) -> &Signature {
103        &self.signature
104    }
105
106    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
107        Ok(match arg_types[0] {
108            List(_) | LargeList(_) | FixedSizeList(_, _) | Map(_, _) => UInt64,
109            _ => {
110                return plan_err!("The cardinality function can only accept List/LargeList/FixedSizeList/Map.");
111            }
112        })
113    }
114
115    fn invoke_with_args(
116        &self,
117        args: datafusion_expr::ScalarFunctionArgs,
118    ) -> Result<ColumnarValue> {
119        make_scalar_function(cardinality_inner)(&args.args)
120    }
121
122    fn aliases(&self) -> &[String] {
123        &self.aliases
124    }
125
126    fn documentation(&self) -> Option<&Documentation> {
127        self.doc()
128    }
129}
130
131/// Cardinality SQL function
132pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
133    let [array] = take_function_args("cardinality", args)?;
134    match &array.data_type() {
135        List(_) => {
136            let list_array = as_list_array(&array)?;
137            generic_list_cardinality::<i32>(list_array)
138        }
139        LargeList(_) => {
140            let list_array = as_large_list_array(&array)?;
141            generic_list_cardinality::<i64>(list_array)
142        }
143        Map(_, _) => {
144            let map_array = as_map_array(&array)?;
145            generic_map_cardinality(map_array)
146        }
147        other => {
148            exec_err!("cardinality does not support type '{:?}'", other)
149        }
150    }
151}
152
153fn generic_map_cardinality(array: &MapArray) -> Result<ArrayRef> {
154    let result: UInt64Array = array
155        .iter()
156        .map(|opt_arr| opt_arr.map(|arr| arr.len() as u64))
157        .collect();
158    Ok(Arc::new(result))
159}
160
161fn generic_list_cardinality<O: OffsetSizeTrait>(
162    array: &GenericListArray<O>,
163) -> Result<ArrayRef> {
164    let result = array
165        .iter()
166        .map(|arr| match crate::utils::compute_array_dims(arr)? {
167            Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>())),
168            None => Ok(None),
169        })
170        .collect::<Result<UInt64Array>>()?;
171    Ok(Arc::new(result) as ArrayRef)
172}