datafusion_spark/function/bitmap/
bitmap_count.rs1use std::any::Any;
19use std::sync::Arc;
20
21use arrow::array::{
22 Array, ArrayRef, BinaryArray, BinaryViewArray, FixedSizeBinaryArray, Int64Array,
23 LargeBinaryArray,
24};
25use arrow::datatypes::DataType;
26use arrow::datatypes::DataType::{
27 Binary, BinaryView, FixedSizeBinary, Int64, LargeBinary,
28};
29use datafusion_common::utils::take_function_args;
30use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result};
31use datafusion_expr::{
32 ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
33};
34use datafusion_functions::utils::make_scalar_function;
35use datafusion_functions::{downcast_arg, downcast_named_arg};
36
37#[derive(Debug, PartialEq, Eq, Hash)]
38pub struct BitmapCount {
39 signature: Signature,
40}
41
42impl Default for BitmapCount {
43 fn default() -> Self {
44 Self::new()
45 }
46}
47
48impl BitmapCount {
49 pub fn new() -> Self {
50 Self {
51 signature: Signature::any(1, Volatility::Immutable),
53 }
54 }
55}
56
57impl ScalarUDFImpl for BitmapCount {
58 fn as_any(&self) -> &dyn Any {
59 self
60 }
61
62 fn name(&self) -> &str {
63 "bitmap_count"
64 }
65
66 fn signature(&self) -> &Signature {
67 &self.signature
68 }
69
70 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
71 match arg_types.first() {
72 Some(Binary | BinaryView | FixedSizeBinary(_) | LargeBinary) => Ok(Int64),
73 Some(data_type) => plan_err!(
74 "bitmap_count expects Binary/BinaryView/FixedSizeBinary/LargeBinary as argument, got {:?}",
75 data_type
76 ),
77 None => internal_err!("bitmap_count does not support zero arguments"),
78 }
79 }
80
81 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
82 make_scalar_function(bitmap_count_inner, vec![])(&args.args)
83 }
84}
85
86fn binary_count_ones(opt: Option<&[u8]>) -> Option<i64> {
87 opt.map(|value| value.iter().map(|b| b.count_ones() as i64).sum())
88}
89
90macro_rules! downcast_and_count_ones {
91 ($input_array:expr, $array_type:ident) => {{
92 let arr = downcast_arg!($input_array, $array_type);
93 Ok(arr.iter().map(binary_count_ones).collect::<Int64Array>())
94 }};
95}
96
97pub fn bitmap_count_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
98 let [input_array] = take_function_args("bitmap_count", arg)?;
99
100 let res: Result<Int64Array> = match &input_array.data_type() {
101 Binary => downcast_and_count_ones!(input_array, BinaryArray),
102 BinaryView => downcast_and_count_ones!(input_array, BinaryViewArray),
103 LargeBinary => downcast_and_count_ones!(input_array, LargeBinaryArray),
104 FixedSizeBinary(_size) => {
105 downcast_and_count_ones!(input_array, FixedSizeBinaryArray)
106 }
107 data_type => {
108 internal_err!("bitmap_count does not support {:?}", data_type)
109 }
110 };
111
112 Ok(Arc::new(res?))
113}
114
115#[cfg(test)]
116mod tests {
117 use crate::function::bitmap::bitmap_count::BitmapCount;
118 use crate::function::utils::test::test_scalar_function;
119 use arrow::array::{Array, Int64Array};
120 use arrow::datatypes::DataType::Int64;
121 use datafusion_common::{Result, ScalarValue};
122 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
123
124 macro_rules! test_bitmap_count_binary_invoke {
125 ($INPUT:expr, $EXPECTED:expr) => {
126 test_scalar_function!(
127 BitmapCount::new(),
128 vec![ColumnarValue::Scalar(ScalarValue::Binary($INPUT))],
129 $EXPECTED,
130 i64,
131 Int64,
132 Int64Array
133 );
134
135 test_scalar_function!(
136 BitmapCount::new(),
137 vec![ColumnarValue::Scalar(ScalarValue::LargeBinary($INPUT))],
138 $EXPECTED,
139 i64,
140 Int64,
141 Int64Array
142 );
143
144 test_scalar_function!(
145 BitmapCount::new(),
146 vec![ColumnarValue::Scalar(ScalarValue::BinaryView($INPUT))],
147 $EXPECTED,
148 i64,
149 Int64,
150 Int64Array
151 );
152
153 test_scalar_function!(
154 BitmapCount::new(),
155 vec![ColumnarValue::Scalar(ScalarValue::FixedSizeBinary(
156 $INPUT.map(|a| a.len()).unwrap_or(0) as i32,
157 $INPUT
158 ))],
159 $EXPECTED,
160 i64,
161 Int64,
162 Int64Array
163 );
164 };
165 }
166
167 #[test]
168 fn test_bitmap_count_invoke() -> Result<()> {
169 test_bitmap_count_binary_invoke!(None::<Vec<u8>>, Ok(None));
170 test_bitmap_count_binary_invoke!(Some(vec![0x0Au8]), Ok(Some(2)));
171 test_bitmap_count_binary_invoke!(Some(vec![0xFFu8, 0xFFu8]), Ok(Some(16)));
172 test_bitmap_count_binary_invoke!(
173 Some(vec![0x0Au8, 0xB0u8, 0xCDu8]),
174 Ok(Some(10))
175 );
176 Ok(())
177 }
178}