rocketmq_filter/utils/
bloom_filter.rs

1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17use crate::utils::bits_array::BitsArray;
18use crate::utils::bloom_filter_data::BloomFilterData;
19
20#[derive(Clone, Copy)]
21pub struct BloomFilter {
22    // as error rate, 10/100 = 0.1
23    f: i32,
24    n: i32,
25    // hash function num, by calculation.
26    k: i32,
27    // bit count, by calculation.
28    m: i32,
29}
30
31impl Default for BloomFilter {
32    fn default() -> Self {
33        BloomFilter {
34            f: 10,
35            n: 128,
36            k: 0,
37            m: 0,
38        }
39    }
40}
41
42#[allow(unused_mut)]
43#[allow(unused_variables)]
44impl BloomFilter {
45    pub fn new(f: i32, n: i32) -> Result<Self, &'static str> {
46        if !(1..100).contains(&f) {
47            return Err("f must be greater or equal than 1 and less than 100");
48        }
49        if n < 1 {
50            return Err("n must be greater than 0");
51        }
52
53        let error_rate = f as f64 / 100.0;
54        let k =
55            (0.5f64.log2() * (n as f64 * error_rate.ln()).abs() / error_rate.ln()).ceil() as i32;
56
57        if k < 1 {
58            return Err(
59                "Hash function num is less than 1, maybe you should change the value of error \
60                 rate or bit num!",
61            );
62        }
63
64        let m = (n as f64 * error_rate.ln().abs() / (2f64.ln() * 2f64.ln())).ceil() as i32;
65        let m = 8 * ((m + 7) / 8); // Ensure m is a multiple of 8
66
67        Ok(BloomFilter { f, n, k, m })
68    }
69
70    pub fn f(&self) -> i32 {
71        self.f
72    }
73
74    pub fn n(&self) -> i32 {
75        self.n
76    }
77
78    pub fn k(&self) -> i32 {
79        self.k
80    }
81
82    pub fn m(&self) -> i32 {
83        self.m
84    }
85
86    pub fn is_valid(&self, filter_data: Option<&BloomFilterData>) -> bool {
87        match filter_data {
88            Some(data) => {
89                data.bit_num() == self.m as u32 && data.bit_pos().len() == self.k as usize
90            }
91            None => false,
92        }
93    }
94
95    pub fn hash_to(&self, filter_data: &BloomFilterData, bits: &mut BitsArray) {
96        /* if !self.is_valid(filter_data) {
97            panic!(
98                "Bloom filter data may not belong to this filter! {:?}, {:?}",
99                filter_data, self
100            );
101        }
102        self.hash_to_positions(filter_data.bit_pos(), bits);*/
103        unimplemented!("hash_to");
104    }
105
106    // Helper method for setting bits at given positions
107    pub fn hash_to_positions(&self, bit_positions: &[usize], bits: &mut BitsArray) {
108        /*self.check(bits);
109        for &i in bit_positions {
110            bits.set_bit(i, true);
111        }*/
112        unimplemented!("hash_to_positions");
113    }
114}