datafusion_spark/function/bitmap/
bitmap_count.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use arrow::array::{
22    Array, ArrayRef, BinaryArray, BinaryViewArray, FixedSizeBinaryArray, Int64Array,
23    LargeBinaryArray,
24};
25use arrow::datatypes::DataType;
26use arrow::datatypes::DataType::{
27    Binary, BinaryView, FixedSizeBinary, Int64, LargeBinary,
28};
29use datafusion_common::utils::take_function_args;
30use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result};
31use datafusion_expr::{
32    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
33};
34use datafusion_functions::utils::make_scalar_function;
35use datafusion_functions::{downcast_arg, downcast_named_arg};
36
37#[derive(Debug, PartialEq, Eq, Hash)]
38pub struct BitmapCount {
39    signature: Signature,
40}
41
42impl Default for BitmapCount {
43    fn default() -> Self {
44        Self::new()
45    }
46}
47
48impl BitmapCount {
49    pub fn new() -> Self {
50        Self {
51            // TODO: add definitive TypeSignature after https://github.com/apache/datafusion/issues/17291 is done
52            signature: Signature::any(1, Volatility::Immutable),
53        }
54    }
55}
56
57impl ScalarUDFImpl for BitmapCount {
58    fn as_any(&self) -> &dyn Any {
59        self
60    }
61
62    fn name(&self) -> &str {
63        "bitmap_count"
64    }
65
66    fn signature(&self) -> &Signature {
67        &self.signature
68    }
69
70    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
71        match arg_types.first() {
72            Some(Binary | BinaryView | FixedSizeBinary(_) | LargeBinary) => Ok(Int64),
73            Some(data_type) => plan_err!(
74                "bitmap_count expects Binary/BinaryView/FixedSizeBinary/LargeBinary as argument, got {:?}", 
75                data_type
76            ),
77            None => internal_err!("bitmap_count does not support zero arguments"),
78        }
79    }
80
81    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
82        make_scalar_function(bitmap_count_inner, vec![])(&args.args)
83    }
84}
85
86fn binary_count_ones(opt: Option<&[u8]>) -> Option<i64> {
87    opt.map(|value| value.iter().map(|b| b.count_ones() as i64).sum())
88}
89
90macro_rules! downcast_and_count_ones {
91    ($input_array:expr, $array_type:ident) => {{
92        let arr = downcast_arg!($input_array, $array_type);
93        Ok(arr.iter().map(binary_count_ones).collect::<Int64Array>())
94    }};
95}
96
97pub fn bitmap_count_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
98    let [input_array] = take_function_args("bitmap_count", arg)?;
99
100    let res: Result<Int64Array> = match &input_array.data_type() {
101        Binary => downcast_and_count_ones!(input_array, BinaryArray),
102        BinaryView => downcast_and_count_ones!(input_array, BinaryViewArray),
103        LargeBinary => downcast_and_count_ones!(input_array, LargeBinaryArray),
104        FixedSizeBinary(_size) => {
105            downcast_and_count_ones!(input_array, FixedSizeBinaryArray)
106        }
107        data_type => {
108            internal_err!("bitmap_count does not support {:?}", data_type)
109        }
110    };
111
112    Ok(Arc::new(res?))
113}
114
115#[cfg(test)]
116mod tests {
117    use crate::function::bitmap::bitmap_count::BitmapCount;
118    use crate::function::utils::test::test_scalar_function;
119    use arrow::array::{Array, Int64Array};
120    use arrow::datatypes::DataType::Int64;
121    use datafusion_common::{Result, ScalarValue};
122    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
123
124    macro_rules! test_bitmap_count_binary_invoke {
125        ($INPUT:expr, $EXPECTED:expr) => {
126            test_scalar_function!(
127                BitmapCount::new(),
128                vec![ColumnarValue::Scalar(ScalarValue::Binary($INPUT))],
129                $EXPECTED,
130                i64,
131                Int64,
132                Int64Array
133            );
134
135            test_scalar_function!(
136                BitmapCount::new(),
137                vec![ColumnarValue::Scalar(ScalarValue::LargeBinary($INPUT))],
138                $EXPECTED,
139                i64,
140                Int64,
141                Int64Array
142            );
143
144            test_scalar_function!(
145                BitmapCount::new(),
146                vec![ColumnarValue::Scalar(ScalarValue::BinaryView($INPUT))],
147                $EXPECTED,
148                i64,
149                Int64,
150                Int64Array
151            );
152
153            test_scalar_function!(
154                BitmapCount::new(),
155                vec![ColumnarValue::Scalar(ScalarValue::FixedSizeBinary(
156                    $INPUT.map(|a| a.len()).unwrap_or(0) as i32,
157                    $INPUT
158                ))],
159                $EXPECTED,
160                i64,
161                Int64,
162                Int64Array
163            );
164        };
165    }
166
167    #[test]
168    fn test_bitmap_count_invoke() -> Result<()> {
169        test_bitmap_count_binary_invoke!(None::<Vec<u8>>, Ok(None));
170        test_bitmap_count_binary_invoke!(Some(vec![0x0Au8]), Ok(Some(2)));
171        test_bitmap_count_binary_invoke!(Some(vec![0xFFu8, 0xFFu8]), Ok(Some(16)));
172        test_bitmap_count_binary_invoke!(
173            Some(vec![0x0Au8, 0xB0u8, 0xCDu8]),
174            Ok(Some(10))
175        );
176        Ok(())
177    }
178}