datafusion_functions_nested/
cardinality.rs1use crate::utils::make_scalar_function;
21use arrow::array::{
22 Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, UInt64Array,
23};
24use arrow::datatypes::{
25 DataType,
26 DataType::{LargeList, List, Map, Null, UInt64},
27};
28use datafusion_common::Result;
29use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
30use datafusion_common::exec_err;
31use datafusion_common::utils::{ListCoercion, take_function_args};
32use datafusion_expr::{
33 ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
34 ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
35};
36use datafusion_macros::user_doc;
37use std::sync::Arc;
38
39make_udf_expr_and_func!(
40 Cardinality,
41 cardinality,
42 array,
43 "returns the total number of elements in the array or map.",
44 cardinality_udf
45);
46
47impl Cardinality {
48 pub fn new() -> Self {
49 Self {
50 signature: Signature::one_of(
51 vec![
52 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
53 arguments: vec![ArrayFunctionArgument::Array],
54 array_coercion: Some(ListCoercion::FixedSizedListToList),
55 }),
56 TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray),
57 ],
58 Volatility::Immutable,
59 ),
60 }
61 }
62}
63
64#[user_doc(
65 doc_section(label = "Array Functions"),
66 description = "Returns the total number of elements in the array.",
67 syntax_example = "cardinality(array)",
68 sql_example = r#"```sql
69> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]);
70+--------------------------------------+
71| cardinality(List([1,2,3,4,5,6,7,8])) |
72+--------------------------------------+
73| 8 |
74+--------------------------------------+
75```"#,
76 argument(
77 name = "array",
78 description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
79 )
80)]
81#[derive(Debug, PartialEq, Eq, Hash)]
82pub struct Cardinality {
83 signature: Signature,
84}
85
86impl Default for Cardinality {
87 fn default() -> Self {
88 Self::new()
89 }
90}
91impl ScalarUDFImpl for Cardinality {
92 fn name(&self) -> &str {
93 "cardinality"
94 }
95
96 fn signature(&self) -> &Signature {
97 &self.signature
98 }
99
100 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
101 Ok(UInt64)
102 }
103
104 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
105 make_scalar_function(cardinality_inner)(&args.args)
106 }
107
108 fn documentation(&self) -> Option<&Documentation> {
109 self.doc()
110 }
111}
112
113fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
114 let [array] = take_function_args("cardinality", args)?;
115 match array.data_type() {
116 Null => Ok(Arc::new(UInt64Array::new_null(array.len()))),
117 List(_) => {
118 let list_array = as_list_array(array)?;
119 generic_list_cardinality::<i32>(list_array)
120 }
121 LargeList(_) => {
122 let list_array = as_large_list_array(array)?;
123 generic_list_cardinality::<i64>(list_array)
124 }
125 Map(_, _) => {
126 let map_array = as_map_array(array)?;
127 generic_map_cardinality(map_array)
128 }
129 arg_type => {
130 exec_err!("cardinality does not support type {arg_type}")
131 }
132 }
133}
134
135fn generic_map_cardinality(array: &MapArray) -> Result<ArrayRef> {
136 let result: UInt64Array = array
137 .iter()
138 .map(|opt_arr| opt_arr.map(|arr| arr.len() as u64))
139 .collect();
140 Ok(Arc::new(result))
141}
142
143fn generic_list_cardinality<O: OffsetSizeTrait>(
144 array: &GenericListArray<O>,
145) -> Result<ArrayRef> {
146 let result = array
147 .iter()
148 .map(|arr| match arr {
149 Some(arr) if arr.is_empty() => Ok(Some(0u64)),
150 arr => match crate::utils::compute_array_dims(arr)? {
151 Some(vector) => {
152 Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>()))
153 }
154 None => Ok(None),
155 },
156 })
157 .collect::<Result<UInt64Array>>()?;
158 Ok(Arc::new(result) as ArrayRef)
159}