Skip to main content

vector/serde/
vector_bitmap.rs

1//! VectorBitmap - shared bitmap type for storing sets of vector IDs.
2//!
3//! This type is used by both `DeletionsValue` and `MetadataIndexValue` to
4//! store compressed sets of vector IDs using RoaringTreemap.
5
6use super::EncodingError;
7use bytes::Bytes;
8use roaring::RoaringTreemap;
9use std::io::Cursor;
10
11/// A compressed bitmap storing a set of vector IDs.
12///
13/// Uses `RoaringTreemap` for efficient compression and fast set operations.
14///
15/// ## Value Layout
16///
17/// ```text
18/// +----------------------------------------------------------------+
19/// |  vector_ids:  RoaringTreemap serialization                     |
20/// |               (compressed u64 bitmap, variable length)         |
21/// +----------------------------------------------------------------+
22/// ```
23#[derive(Debug, Clone)]
24pub struct VectorBitmap {
25    /// Compressed set of vector IDs.
26    pub vector_ids: RoaringTreemap,
27}
28
29impl VectorBitmap {
30    pub fn new() -> Self {
31        Self {
32            vector_ids: RoaringTreemap::new(),
33        }
34    }
35
36    pub fn from_treemap(vector_ids: RoaringTreemap) -> Self {
37        Self { vector_ids }
38    }
39
40    /// Create a VectorBitmap containing a single vector ID.
41    pub fn singleton(vector_id: u64) -> Self {
42        let mut treemap = RoaringTreemap::new();
43        treemap.insert(vector_id);
44        Self {
45            vector_ids: treemap,
46        }
47    }
48
49    /// Insert a vector ID into the bitmap.
50    pub fn insert(&mut self, vector_id: u64) -> bool {
51        self.vector_ids.insert(vector_id)
52    }
53
54    /// Remove a vector ID from the bitmap.
55    pub fn remove(&mut self, vector_id: u64) -> bool {
56        self.vector_ids.remove(vector_id)
57    }
58
59    /// Check if a vector ID is in the bitmap.
60    pub fn contains(&self, vector_id: u64) -> bool {
61        self.vector_ids.contains(vector_id)
62    }
63
64    /// Returns the number of vector IDs in the bitmap.
65    pub fn len(&self) -> u64 {
66        self.vector_ids.len()
67    }
68
69    /// Returns true if the bitmap is empty.
70    pub fn is_empty(&self) -> bool {
71        self.vector_ids.is_empty()
72    }
73
74    /// Returns an iterator over the vector IDs.
75    pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
76        self.vector_ids.iter()
77    }
78
79    /// Union (OR) this bitmap with another.
80    pub fn union_with(&mut self, other: &Self) {
81        self.vector_ids |= &other.vector_ids;
82    }
83
84    /// Difference (AND-NOT) this bitmap with another.
85    pub fn difference_with(&mut self, other: &Self) {
86        self.vector_ids -= &other.vector_ids;
87    }
88
89    /// Intersection (AND) this bitmap with another.
90    pub fn intersect_with(&mut self, other: &Self) {
91        self.vector_ids &= &other.vector_ids;
92    }
93
94    pub fn encode_to_bytes(&self) -> Result<Bytes, EncodingError> {
95        let mut buf = Vec::new();
96        self.vector_ids
97            .serialize_into(&mut buf)
98            .map_err(|e| EncodingError {
99                message: format!("Failed to serialize RoaringTreemap: {}", e),
100            })?;
101        Ok(Bytes::from(buf))
102    }
103
104    pub fn decode_from_bytes(buf: &[u8]) -> Result<Self, EncodingError> {
105        let cursor = Cursor::new(buf);
106        let vector_ids = RoaringTreemap::deserialize_from(cursor).map_err(|e| EncodingError {
107            message: format!("Failed to deserialize RoaringTreemap: {}", e),
108        })?;
109        Ok(VectorBitmap { vector_ids })
110    }
111}
112
113impl Default for VectorBitmap {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119impl PartialEq for VectorBitmap {
120    fn eq(&self, other: &Self) -> bool {
121        self.vector_ids == other.vector_ids
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn should_encode_and_decode_empty_bitmap() {
131        // given
132        let bitmap = VectorBitmap::new();
133
134        // when
135        let encoded = bitmap.encode_to_bytes().unwrap();
136        let decoded = VectorBitmap::decode_from_bytes(&encoded).unwrap();
137
138        // then
139        assert!(decoded.is_empty());
140    }
141
142    #[test]
143    fn should_encode_and_decode_bitmap_with_ids() {
144        // given
145        let mut bitmap = VectorBitmap::new();
146        bitmap.insert(1);
147        bitmap.insert(100);
148        bitmap.insert(10000);
149        bitmap.insert(u64::MAX);
150
151        // when
152        let encoded = bitmap.encode_to_bytes().unwrap();
153        let decoded = VectorBitmap::decode_from_bytes(&encoded).unwrap();
154
155        // then
156        assert_eq!(decoded.len(), 4);
157        assert!(decoded.contains(1));
158        assert!(decoded.contains(100));
159        assert!(decoded.contains(10000));
160        assert!(decoded.contains(u64::MAX));
161    }
162
163    #[test]
164    fn should_create_singleton() {
165        // given / when
166        let bitmap = VectorBitmap::singleton(42);
167
168        // then
169        assert_eq!(bitmap.len(), 1);
170        assert!(bitmap.contains(42));
171    }
172
173    #[test]
174    fn should_perform_union() {
175        // given
176        let mut b1 = VectorBitmap::new();
177        b1.insert(1);
178        b1.insert(2);
179
180        let mut b2 = VectorBitmap::new();
181        b2.insert(2);
182        b2.insert(3);
183
184        // when
185        b1.union_with(&b2);
186
187        // then
188        assert_eq!(b1.len(), 3);
189        assert!(b1.contains(1));
190        assert!(b1.contains(2));
191        assert!(b1.contains(3));
192    }
193
194    #[test]
195    fn should_perform_difference() {
196        // given
197        let mut b1 = VectorBitmap::new();
198        b1.insert(1);
199        b1.insert(2);
200        b1.insert(3);
201
202        let mut b2 = VectorBitmap::new();
203        b2.insert(2);
204
205        // when
206        b1.difference_with(&b2);
207
208        // then
209        assert_eq!(b1.len(), 2);
210        assert!(b1.contains(1));
211        assert!(!b1.contains(2));
212        assert!(b1.contains(3));
213    }
214
215    #[test]
216    fn should_perform_intersection() {
217        // given
218        let mut b1 = VectorBitmap::new();
219        b1.insert(1);
220        b1.insert(2);
221        b1.insert(3);
222
223        let mut b2 = VectorBitmap::new();
224        b2.insert(2);
225        b2.insert(3);
226        b2.insert(4);
227
228        // when
229        b1.intersect_with(&b2);
230
231        // then
232        assert_eq!(b1.len(), 2);
233        assert!(!b1.contains(1));
234        assert!(b1.contains(2));
235        assert!(b1.contains(3));
236        assert!(!b1.contains(4));
237    }
238
239    #[test]
240    fn should_iterate_over_ids() {
241        // given
242        let mut bitmap = VectorBitmap::new();
243        bitmap.insert(3);
244        bitmap.insert(1);
245        bitmap.insert(2);
246
247        // when
248        let ids: Vec<u64> = bitmap.iter().collect();
249
250        // then
251        assert_eq!(ids, vec![1, 2, 3]); // Sorted order
252    }
253}