datafusion_functions_nested/
cardinality.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for cardinality function.
19
20use crate::utils::make_scalar_function;
21use arrow::array::{
22    Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, UInt64Array,
23};
24use arrow::datatypes::{
25    DataType,
26    DataType::{LargeList, List, Map, Null, UInt64},
27};
28use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
29use datafusion_common::exec_err;
30use datafusion_common::utils::{take_function_args, ListCoercion};
31use datafusion_common::Result;
32use datafusion_expr::{
33    ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
34    ScalarUDFImpl, Signature, TypeSignature, Volatility,
35};
36use datafusion_macros::user_doc;
37use std::any::Any;
38use std::sync::Arc;
39
40make_udf_expr_and_func!(
41    Cardinality,
42    cardinality,
43    array,
44    "returns the total number of elements in the array or map.",
45    cardinality_udf
46);
47
48impl Cardinality {
49    pub fn new() -> Self {
50        Self {
51            signature: Signature::one_of(
52                vec![
53                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
54                        arguments: vec![ArrayFunctionArgument::Array],
55                        array_coercion: Some(ListCoercion::FixedSizedListToList),
56                    }),
57                    TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray),
58                ],
59                Volatility::Immutable,
60            ),
61        }
62    }
63}
64
65#[user_doc(
66    doc_section(label = "Array Functions"),
67    description = "Returns the total number of elements in the array.",
68    syntax_example = "cardinality(array)",
69    sql_example = r#"```sql
70> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]);
71+--------------------------------------+
72| cardinality(List([1,2,3,4,5,6,7,8])) |
73+--------------------------------------+
74| 8                                    |
75+--------------------------------------+
76```"#,
77    argument(
78        name = "array",
79        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
80    )
81)]
82#[derive(Debug, PartialEq, Eq, Hash)]
83pub struct Cardinality {
84    signature: Signature,
85}
86
87impl Default for Cardinality {
88    fn default() -> Self {
89        Self::new()
90    }
91}
92impl ScalarUDFImpl for Cardinality {
93    fn as_any(&self) -> &dyn Any {
94        self
95    }
96    fn name(&self) -> &str {
97        "cardinality"
98    }
99
100    fn signature(&self) -> &Signature {
101        &self.signature
102    }
103
104    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
105        Ok(UInt64)
106    }
107
108    fn invoke_with_args(
109        &self,
110        args: datafusion_expr::ScalarFunctionArgs,
111    ) -> Result<ColumnarValue> {
112        make_scalar_function(cardinality_inner)(&args.args)
113    }
114
115    fn documentation(&self) -> Option<&Documentation> {
116        self.doc()
117    }
118}
119
120/// Cardinality SQL function
121pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
122    let [array] = take_function_args("cardinality", args)?;
123    match array.data_type() {
124        Null => Ok(Arc::new(UInt64Array::from_value(0, array.len()))),
125        List(_) => {
126            let list_array = as_list_array(array)?;
127            generic_list_cardinality::<i32>(list_array)
128        }
129        LargeList(_) => {
130            let list_array = as_large_list_array(array)?;
131            generic_list_cardinality::<i64>(list_array)
132        }
133        Map(_, _) => {
134            let map_array = as_map_array(array)?;
135            generic_map_cardinality(map_array)
136        }
137        arg_type => {
138            exec_err!("cardinality does not support type {arg_type}")
139        }
140    }
141}
142
143fn generic_map_cardinality(array: &MapArray) -> Result<ArrayRef> {
144    let result: UInt64Array = array
145        .iter()
146        .map(|opt_arr| opt_arr.map(|arr| arr.len() as u64))
147        .collect();
148    Ok(Arc::new(result))
149}
150
151fn generic_list_cardinality<O: OffsetSizeTrait>(
152    array: &GenericListArray<O>,
153) -> Result<ArrayRef> {
154    let result = array
155        .iter()
156        .map(|arr| match crate::utils::compute_array_dims(arr)? {
157            Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>())),
158            None => Ok(None),
159        })
160        .collect::<Result<UInt64Array>>()?;
161    Ok(Arc::new(result) as ArrayRef)
162}