datafusion_functions_nested/
cardinality.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for cardinality function.
19
20use crate::utils::make_scalar_function;
21use arrow::array::{
22    Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, UInt64Array,
23};
24use arrow::datatypes::{
25    DataType,
26    DataType::{LargeList, List, Map, Null, UInt64},
27};
28use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
29use datafusion_common::exec_err;
30use datafusion_common::utils::{take_function_args, ListCoercion};
31use datafusion_common::Result;
32use datafusion_expr::{
33    ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
34    ScalarUDFImpl, Signature, TypeSignature, Volatility,
35};
36use datafusion_macros::user_doc;
37use std::any::Any;
38use std::sync::Arc;
39
40make_udf_expr_and_func!(
41    Cardinality,
42    cardinality,
43    array,
44    "returns the total number of elements in the array or map.",
45    cardinality_udf
46);
47
48impl Cardinality {
49    pub fn new() -> Self {
50        Self {
51            signature: Signature::one_of(
52                vec![
53                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
54                        arguments: vec![ArrayFunctionArgument::Array],
55                        array_coercion: Some(ListCoercion::FixedSizedListToList),
56                    }),
57                    TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray),
58                ],
59                Volatility::Immutable,
60            ),
61            aliases: vec![],
62        }
63    }
64}
65
66#[user_doc(
67    doc_section(label = "Array Functions"),
68    description = "Returns the total number of elements in the array.",
69    syntax_example = "cardinality(array)",
70    sql_example = r#"```sql
71> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]);
72+--------------------------------------+
73| cardinality(List([1,2,3,4,5,6,7,8])) |
74+--------------------------------------+
75| 8                                    |
76+--------------------------------------+
77```"#,
78    argument(
79        name = "array",
80        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
81    )
82)]
83#[derive(Debug)]
84pub struct Cardinality {
85    signature: Signature,
86    aliases: Vec<String>,
87}
88
89impl Default for Cardinality {
90    fn default() -> Self {
91        Self::new()
92    }
93}
94impl ScalarUDFImpl for Cardinality {
95    fn as_any(&self) -> &dyn Any {
96        self
97    }
98    fn name(&self) -> &str {
99        "cardinality"
100    }
101
102    fn signature(&self) -> &Signature {
103        &self.signature
104    }
105
106    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
107        Ok(UInt64)
108    }
109
110    fn invoke_with_args(
111        &self,
112        args: datafusion_expr::ScalarFunctionArgs,
113    ) -> Result<ColumnarValue> {
114        make_scalar_function(cardinality_inner)(&args.args)
115    }
116
117    fn aliases(&self) -> &[String] {
118        &self.aliases
119    }
120
121    fn documentation(&self) -> Option<&Documentation> {
122        self.doc()
123    }
124}
125
126/// Cardinality SQL function
127pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
128    let [array] = take_function_args("cardinality", args)?;
129    match array.data_type() {
130        Null => Ok(Arc::new(UInt64Array::from_value(0, array.len()))),
131        List(_) => {
132            let list_array = as_list_array(array)?;
133            generic_list_cardinality::<i32>(list_array)
134        }
135        LargeList(_) => {
136            let list_array = as_large_list_array(array)?;
137            generic_list_cardinality::<i64>(list_array)
138        }
139        Map(_, _) => {
140            let map_array = as_map_array(array)?;
141            generic_map_cardinality(map_array)
142        }
143        arg_type => {
144            exec_err!("cardinality does not support type {arg_type}")
145        }
146    }
147}
148
149fn generic_map_cardinality(array: &MapArray) -> Result<ArrayRef> {
150    let result: UInt64Array = array
151        .iter()
152        .map(|opt_arr| opt_arr.map(|arr| arr.len() as u64))
153        .collect();
154    Ok(Arc::new(result))
155}
156
157fn generic_list_cardinality<O: OffsetSizeTrait>(
158    array: &GenericListArray<O>,
159) -> Result<ArrayRef> {
160    let result = array
161        .iter()
162        .map(|arr| match crate::utils::compute_array_dims(arr)? {
163            Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>())),
164            None => Ok(None),
165        })
166        .collect::<Result<UInt64Array>>()?;
167    Ok(Arc::new(result) as ArrayRef)
168}