datafusion_functions_nested/
cardinality.rs1use crate::utils::make_scalar_function;
21use arrow::array::{
22 Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, UInt64Array,
23};
24use arrow::datatypes::{
25 DataType,
26 DataType::{FixedSizeList, LargeList, List, Map, UInt64},
27};
28use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array};
29use datafusion_common::utils::take_function_args;
30use datafusion_common::Result;
31use datafusion_common::{exec_err, plan_err};
32use datafusion_expr::{
33 ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
34 ScalarUDFImpl, Signature, TypeSignature, Volatility,
35};
36use datafusion_macros::user_doc;
37use std::any::Any;
38use std::sync::Arc;
39
40make_udf_expr_and_func!(
41 Cardinality,
42 cardinality,
43 array,
44 "returns the total number of elements in the array or map.",
45 cardinality_udf
46);
47
48impl Cardinality {
49 pub fn new() -> Self {
50 Self {
51 signature: Signature::one_of(
52 vec![
53 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
54 arguments: vec![ArrayFunctionArgument::Array],
55 array_coercion: None,
56 }),
57 TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray),
58 ],
59 Volatility::Immutable,
60 ),
61 aliases: vec![],
62 }
63 }
64}
65
66#[user_doc(
67 doc_section(label = "Array Functions"),
68 description = "Returns the total number of elements in the array.",
69 syntax_example = "cardinality(array)",
70 sql_example = r#"```sql
71> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]);
72+--------------------------------------+
73| cardinality(List([1,2,3,4,5,6,7,8])) |
74+--------------------------------------+
75| 8 |
76+--------------------------------------+
77```"#,
78 argument(
79 name = "array",
80 description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
81 )
82)]
83#[derive(Debug)]
84pub struct Cardinality {
85 signature: Signature,
86 aliases: Vec<String>,
87}
88
89impl Default for Cardinality {
90 fn default() -> Self {
91 Self::new()
92 }
93}
94impl ScalarUDFImpl for Cardinality {
95 fn as_any(&self) -> &dyn Any {
96 self
97 }
98 fn name(&self) -> &str {
99 "cardinality"
100 }
101
102 fn signature(&self) -> &Signature {
103 &self.signature
104 }
105
106 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
107 Ok(match arg_types[0] {
108 List(_) | LargeList(_) | FixedSizeList(_, _) | Map(_, _) => UInt64,
109 _ => {
110 return plan_err!("The cardinality function can only accept List/LargeList/FixedSizeList/Map.");
111 }
112 })
113 }
114
115 fn invoke_with_args(
116 &self,
117 args: datafusion_expr::ScalarFunctionArgs,
118 ) -> Result<ColumnarValue> {
119 make_scalar_function(cardinality_inner)(&args.args)
120 }
121
122 fn aliases(&self) -> &[String] {
123 &self.aliases
124 }
125
126 fn documentation(&self) -> Option<&Documentation> {
127 self.doc()
128 }
129}
130
131pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
133 let [array] = take_function_args("cardinality", args)?;
134 match &array.data_type() {
135 List(_) => {
136 let list_array = as_list_array(&array)?;
137 generic_list_cardinality::<i32>(list_array)
138 }
139 LargeList(_) => {
140 let list_array = as_large_list_array(&array)?;
141 generic_list_cardinality::<i64>(list_array)
142 }
143 Map(_, _) => {
144 let map_array = as_map_array(&array)?;
145 generic_map_cardinality(map_array)
146 }
147 other => {
148 exec_err!("cardinality does not support type '{:?}'", other)
149 }
150 }
151}
152
153fn generic_map_cardinality(array: &MapArray) -> Result<ArrayRef> {
154 let result: UInt64Array = array
155 .iter()
156 .map(|opt_arr| opt_arr.map(|arr| arr.len() as u64))
157 .collect();
158 Ok(Arc::new(result))
159}
160
161fn generic_list_cardinality<O: OffsetSizeTrait>(
162 array: &GenericListArray<O>,
163) -> Result<ArrayRef> {
164 let result = array
165 .iter()
166 .map(|arr| match crate::utils::compute_array_dims(arr)? {
167 Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>())),
168 None => Ok(None),
169 })
170 .collect::<Result<UInt64Array>>()?;
171 Ok(Arc::new(result) as ArrayRef)
172}