datafusion_functions_nested/
cardinality.rs1use crate::utils::make_scalar_function;
21use arrow::array::{
22 Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, UInt64Array,
23};
24use arrow::datatypes::{
25 DataType,
26 DataType::{LargeList, List, Map, Null, UInt64},
27};
28use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
29use datafusion_common::exec_err;
30use datafusion_common::utils::{take_function_args, ListCoercion};
31use datafusion_common::Result;
32use datafusion_expr::{
33 ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
34 ScalarUDFImpl, Signature, TypeSignature, Volatility,
35};
36use datafusion_macros::user_doc;
37use std::any::Any;
38use std::sync::Arc;
39
40make_udf_expr_and_func!(
41 Cardinality,
42 cardinality,
43 array,
44 "returns the total number of elements in the array or map.",
45 cardinality_udf
46);
47
48impl Cardinality {
49 pub fn new() -> Self {
50 Self {
51 signature: Signature::one_of(
52 vec![
53 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
54 arguments: vec![ArrayFunctionArgument::Array],
55 array_coercion: Some(ListCoercion::FixedSizedListToList),
56 }),
57 TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray),
58 ],
59 Volatility::Immutable,
60 ),
61 }
62 }
63}
64
65#[user_doc(
66 doc_section(label = "Array Functions"),
67 description = "Returns the total number of elements in the array.",
68 syntax_example = "cardinality(array)",
69 sql_example = r#"```sql
70> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]);
71+--------------------------------------+
72| cardinality(List([1,2,3,4,5,6,7,8])) |
73+--------------------------------------+
74| 8 |
75+--------------------------------------+
76```"#,
77 argument(
78 name = "array",
79 description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
80 )
81)]
82#[derive(Debug, PartialEq, Eq, Hash)]
83pub struct Cardinality {
84 signature: Signature,
85}
86
87impl Default for Cardinality {
88 fn default() -> Self {
89 Self::new()
90 }
91}
92impl ScalarUDFImpl for Cardinality {
93 fn as_any(&self) -> &dyn Any {
94 self
95 }
96 fn name(&self) -> &str {
97 "cardinality"
98 }
99
100 fn signature(&self) -> &Signature {
101 &self.signature
102 }
103
104 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
105 Ok(UInt64)
106 }
107
108 fn invoke_with_args(
109 &self,
110 args: datafusion_expr::ScalarFunctionArgs,
111 ) -> Result<ColumnarValue> {
112 make_scalar_function(cardinality_inner)(&args.args)
113 }
114
115 fn documentation(&self) -> Option<&Documentation> {
116 self.doc()
117 }
118}
119
120pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
122 let [array] = take_function_args("cardinality", args)?;
123 match array.data_type() {
124 Null => Ok(Arc::new(UInt64Array::from_value(0, array.len()))),
125 List(_) => {
126 let list_array = as_list_array(array)?;
127 generic_list_cardinality::<i32>(list_array)
128 }
129 LargeList(_) => {
130 let list_array = as_large_list_array(array)?;
131 generic_list_cardinality::<i64>(list_array)
132 }
133 Map(_, _) => {
134 let map_array = as_map_array(array)?;
135 generic_map_cardinality(map_array)
136 }
137 arg_type => {
138 exec_err!("cardinality does not support type {arg_type}")
139 }
140 }
141}
142
143fn generic_map_cardinality(array: &MapArray) -> Result<ArrayRef> {
144 let result: UInt64Array = array
145 .iter()
146 .map(|opt_arr| opt_arr.map(|arr| arr.len() as u64))
147 .collect();
148 Ok(Arc::new(result))
149}
150
151fn generic_list_cardinality<O: OffsetSizeTrait>(
152 array: &GenericListArray<O>,
153) -> Result<ArrayRef> {
154 let result = array
155 .iter()
156 .map(|arr| match crate::utils::compute_array_dims(arr)? {
157 Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>())),
158 None => Ok(None),
159 })
160 .collect::<Result<UInt64Array>>()?;
161 Ok(Arc::new(result) as ArrayRef)
162}