Skip to main content

chematic_fp/
bitvec.rs

1//! Fixed-size 2048-bit bitvector backed by 32 × u64 words.
2
3/// A 2048-bit bitvector.
4///
5/// The vector is stored as 32 little-endian 64-bit words.
6/// Bit `n` resides in word `n / 64`, at position `n % 64`.
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub struct BitVec2048 {
9    words: [u64; 32],
10}
11
12impl Default for BitVec2048 {
13    fn default() -> Self {
14        Self::new()
15    }
16}
17
18impl BitVec2048 {
19    /// Create a new all-zero bitvector.
20    pub fn new() -> Self {
21        Self { words: [0u64; 32] }
22    }
23
24    /// Set bit `bit` to 1.
25    ///
26    /// # Panics
27    /// Panics if `bit >= 2048`.
28    pub fn set(&mut self, bit: usize) {
29        assert!(bit < 2048, "bit index {bit} out of range for BitVec2048 (max 2047)");
30        self.words[bit / 64] |= 1u64 << (bit % 64);
31    }
32
33    /// Return the value of bit `bit`.
34    ///
35    /// # Panics
36    /// Panics if `bit >= 2048`.
37    pub fn get(&self, bit: usize) -> bool {
38        assert!(bit < 2048, "bit index {bit} out of range for BitVec2048 (max 2047)");
39        (self.words[bit / 64] >> (bit % 64)) & 1 == 1
40    }
41
42    /// Count the number of bits set to 1 (popcount / Hamming weight).
43    pub fn popcount(&self) -> u32 {
44        self.words.iter().map(|w| w.count_ones()).sum()
45    }
46
47    /// Bitwise AND of two bitvectors.
48    pub fn and(&self, other: &Self) -> Self {
49        let mut result = Self::new();
50        for (out, (a, b)) in result
51            .words
52            .iter_mut()
53            .zip(self.words.iter().zip(other.words.iter()))
54        {
55            *out = a & b;
56        }
57        result
58    }
59
60    /// Bitwise OR of two bitvectors.
61    pub fn or(&self, other: &Self) -> Self {
62        let mut result = Self::new();
63        for (out, (a, b)) in result
64            .words
65            .iter_mut()
66            .zip(self.words.iter().zip(other.words.iter()))
67        {
68            *out = a | b;
69        }
70        result
71    }
72
73    /// Tanimoto similarity: `|A & B| / |A | B|`.
74    ///
75    /// Returns `1.0` when both vectors are all-zero (the empty-set convention).
76    pub fn tanimoto(&self, other: &Self) -> f64 {
77        let intersection = self.and(other).popcount() as f64;
78        let a = self.popcount() as f64;
79        let b = other.popcount() as f64;
80        let union = a + b - intersection;
81        if union == 0.0 {
82            1.0
83        } else {
84            intersection / union
85        }
86    }
87
88    /// Dice similarity: `2|A & B| / (|A| + |B|)`.
89    ///
90    /// Returns `1.0` when both vectors are all-zero.
91    pub fn dice(&self, other: &Self) -> f64 {
92        let intersection = self.and(other).popcount() as f64;
93        let a = self.popcount() as f64;
94        let b = other.popcount() as f64;
95        let denom = a + b;
96        if denom == 0.0 {
97            1.0
98        } else {
99            2.0 * intersection / denom
100        }
101    }
102
103    /// XOR-fold this 2048-bit vector to `bits` bits.
104    ///
105    /// The upper half is XOR-folded into the lower half, and the upper half is zeroed.
106    /// This is applied recursively until the target width is reached.
107    ///
108    /// `bits` must be 1024, 512, or 256.
109    ///
110    /// # Panics
111    /// Panics if `bits` is not one of 1024, 512, or 256.
112    pub fn fold(&self, bits: usize) -> Self {
113        assert!(
114            matches!(bits, 256 | 512 | 1024),
115            "fold target must be 1024, 512, or 256; got {bits}"
116        );
117
118        let mut current = self.clone();
119        let mut current_bits = 2048usize;
120
121        while current_bits > bits {
122            let half_words = current_bits / 64 / 2; // number of 64-bit words in half
123            let mut folded = Self::new();
124            for i in 0..half_words {
125                folded.words[i] = current.words[i] ^ current.words[i + half_words];
126            }
127            current = folded;
128            current_bits /= 2;
129        }
130
131        current
132    }
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    #[test]
140    fn new_bitvec_is_all_zero() {
141        let bv = BitVec2048::new();
142        assert_eq!(bv.popcount(), 0, "new bitvec must have popcount 0");
143    }
144
145    #[test]
146    fn set_and_get_boundary_bits() {
147        let mut bv = BitVec2048::new();
148        bv.set(0);
149        bv.set(2047);
150        assert_eq!(bv.popcount(), 2);
151        assert!(bv.get(0));
152        assert!(!bv.get(1));
153        assert!(bv.get(2047));
154    }
155
156    #[test]
157    fn tanimoto_identical_nonzero() {
158        let mut bv = BitVec2048::new();
159        bv.set(42);
160        bv.set(100);
161        assert_eq!(bv.tanimoto(&bv.clone()), 1.0);
162    }
163
164    #[test]
165    fn tanimoto_disjoint_is_zero() {
166        let mut a = BitVec2048::new();
167        a.set(10);
168        let mut b = BitVec2048::new();
169        b.set(20);
170        assert_eq!(a.tanimoto(&b), 0.0);
171    }
172
173    #[test]
174    fn dice_identical_nonzero() {
175        let mut bv = BitVec2048::new();
176        bv.set(7);
177        assert_eq!(bv.dice(&bv.clone()), 1.0);
178    }
179
180    #[test]
181    fn fold_1024_popcount_leq_original() {
182        // XOR-fold can only reduce or maintain the popcount.
183        let mut bv = BitVec2048::new();
184        for i in (0..2048).step_by(3) {
185            bv.set(i);
186        }
187        let folded = bv.fold(1024);
188        assert!(
189            folded.popcount() <= bv.popcount(),
190            "folded popcount ({}) must be <= original ({})",
191            folded.popcount(),
192            bv.popcount()
193        );
194    }
195
196    #[test]
197    fn and_or_basic_correctness() {
198        let mut a = BitVec2048::new();
199        a.set(5);
200        a.set(10);
201
202        let mut b = BitVec2048::new();
203        b.set(10);
204        b.set(15);
205
206        let and = a.and(&b);
207        assert!(and.get(10), "AND: shared bit 10 should be set");
208        assert!(!and.get(5), "AND: bit 5 only in A should be clear");
209        assert!(!and.get(15), "AND: bit 15 only in B should be clear");
210
211        let or = a.or(&b);
212        assert!(or.get(5), "OR: bit 5 should be set");
213        assert!(or.get(10), "OR: bit 10 should be set");
214        assert!(or.get(15), "OR: bit 15 should be set");
215        assert!(!or.get(0), "OR: bit 0 should be clear");
216    }
217}