Skip to main content

datafusion_functions_nested/
cardinality.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDFImpl`] definitions for cardinality function.
19
20use crate::utils::make_scalar_function;
21use arrow::array::{
22    Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, UInt64Array,
23};
24use arrow::datatypes::{
25    DataType,
26    DataType::{LargeList, List, Map, Null, UInt64},
27};
28use datafusion_common::Result;
29use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
30use datafusion_common::exec_err;
31use datafusion_common::utils::{ListCoercion, take_function_args};
32use datafusion_expr::{
33    ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
34    ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
35};
36use datafusion_macros::user_doc;
37use std::sync::Arc;
38
39make_udf_expr_and_func!(
40    Cardinality,
41    cardinality,
42    array,
43    "returns the total number of elements in the array or map.",
44    cardinality_udf
45);
46
47impl Cardinality {
48    pub fn new() -> Self {
49        Self {
50            signature: Signature::one_of(
51                vec![
52                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
53                        arguments: vec![ArrayFunctionArgument::Array],
54                        array_coercion: Some(ListCoercion::FixedSizedListToList),
55                    }),
56                    TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray),
57                ],
58                Volatility::Immutable,
59            ),
60        }
61    }
62}
63
64#[user_doc(
65    doc_section(label = "Array Functions"),
66    description = "Returns the total number of elements in the array.",
67    syntax_example = "cardinality(array)",
68    sql_example = r#"```sql
69> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]);
70+--------------------------------------+
71| cardinality(List([1,2,3,4,5,6,7,8])) |
72+--------------------------------------+
73| 8                                    |
74+--------------------------------------+
75```"#,
76    argument(
77        name = "array",
78        description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
79    )
80)]
81#[derive(Debug, PartialEq, Eq, Hash)]
82pub struct Cardinality {
83    signature: Signature,
84}
85
86impl Default for Cardinality {
87    fn default() -> Self {
88        Self::new()
89    }
90}
91impl ScalarUDFImpl for Cardinality {
92    fn name(&self) -> &str {
93        "cardinality"
94    }
95
96    fn signature(&self) -> &Signature {
97        &self.signature
98    }
99
100    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
101        Ok(UInt64)
102    }
103
104    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
105        make_scalar_function(cardinality_inner)(&args.args)
106    }
107
108    fn documentation(&self) -> Option<&Documentation> {
109        self.doc()
110    }
111}
112
113fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
114    let [array] = take_function_args("cardinality", args)?;
115    match array.data_type() {
116        Null => Ok(Arc::new(UInt64Array::new_null(array.len()))),
117        List(_) => {
118            let list_array = as_list_array(array)?;
119            generic_list_cardinality::<i32>(list_array)
120        }
121        LargeList(_) => {
122            let list_array = as_large_list_array(array)?;
123            generic_list_cardinality::<i64>(list_array)
124        }
125        Map(_, _) => {
126            let map_array = as_map_array(array)?;
127            generic_map_cardinality(map_array)
128        }
129        arg_type => {
130            exec_err!("cardinality does not support type {arg_type}")
131        }
132    }
133}
134
135fn generic_map_cardinality(array: &MapArray) -> Result<ArrayRef> {
136    let result: UInt64Array = array
137        .iter()
138        .map(|opt_arr| opt_arr.map(|arr| arr.len() as u64))
139        .collect();
140    Ok(Arc::new(result))
141}
142
143fn generic_list_cardinality<O: OffsetSizeTrait>(
144    array: &GenericListArray<O>,
145) -> Result<ArrayRef> {
146    let result = array
147        .iter()
148        .map(|arr| match arr {
149            Some(arr) if arr.is_empty() => Ok(Some(0u64)),
150            arr => match crate::utils::compute_array_dims(arr)? {
151                Some(vector) => {
152                    Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>()))
153                }
154                None => Ok(None),
155            },
156        })
157        .collect::<Result<UInt64Array>>()?;
158    Ok(Arc::new(result) as ArrayRef)
159}