ruvector_graph/optimization/
index_compression.rs1use parking_lot::RwLock;
9use roaring::RoaringBitmap;
10use std::collections::HashMap;
11use std::sync::Arc;
12
13pub struct CompressedIndex {
15 label_indexes: Arc<RwLock<HashMap<String, RoaringBitmap>>>,
17 sorted_indexes: Arc<RwLock<HashMap<String, DeltaEncodedList>>>,
19 string_dict: Arc<RwLock<StringDictionary>>,
21}
22
23impl CompressedIndex {
24 pub fn new() -> Self {
25 Self {
26 label_indexes: Arc::new(RwLock::new(HashMap::new())),
27 sorted_indexes: Arc::new(RwLock::new(HashMap::new())),
28 string_dict: Arc::new(RwLock::new(StringDictionary::new())),
29 }
30 }
31
32 pub fn add_to_label_index(&self, label: &str, node_id: u64) {
34 let mut indexes = self.label_indexes.write();
35 indexes
36 .entry(label.to_string())
37 .or_insert_with(RoaringBitmap::new)
38 .insert(node_id as u32);
39 }
40
41 pub fn get_nodes_by_label(&self, label: &str) -> Vec<u64> {
43 self.label_indexes
44 .read()
45 .get(label)
46 .map(|bitmap| bitmap.iter().map(|id| id as u64).collect())
47 .unwrap_or_default()
48 }
49
50 pub fn has_label(&self, label: &str, node_id: u64) -> bool {
52 self.label_indexes
53 .read()
54 .get(label)
55 .map(|bitmap| bitmap.contains(node_id as u32))
56 .unwrap_or(false)
57 }
58
59 pub fn count_label(&self, label: &str) -> u64 {
61 self.label_indexes
62 .read()
63 .get(label)
64 .map(|bitmap| bitmap.len())
65 .unwrap_or(0)
66 }
67
68 pub fn intersect_labels(&self, labels: &[&str]) -> Vec<u64> {
70 let indexes = self.label_indexes.read();
71
72 if labels.is_empty() {
73 return Vec::new();
74 }
75
76 let mut result = indexes
77 .get(labels[0])
78 .cloned()
79 .unwrap_or_else(RoaringBitmap::new);
80
81 for &label in &labels[1..] {
82 if let Some(bitmap) = indexes.get(label) {
83 result &= bitmap;
84 } else {
85 return Vec::new();
86 }
87 }
88
89 result.iter().map(|id| id as u64).collect()
90 }
91
92 pub fn union_labels(&self, labels: &[&str]) -> Vec<u64> {
94 let indexes = self.label_indexes.read();
95 let mut result = RoaringBitmap::new();
96
97 for &label in labels {
98 if let Some(bitmap) = indexes.get(label) {
99 result |= bitmap;
100 }
101 }
102
103 result.iter().map(|id| id as u64).collect()
104 }
105
106 pub fn encode_string(&self, s: &str) -> u32 {
108 self.string_dict.write().encode(s)
109 }
110
111 pub fn decode_string(&self, id: u32) -> Option<String> {
113 self.string_dict.read().decode(id)
114 }
115}
116
117impl Default for CompressedIndex {
118 fn default() -> Self {
119 Self::new()
120 }
121}
122
123pub struct RoaringBitmapIndex {
125 bitmap: RoaringBitmap,
126}
127
128impl RoaringBitmapIndex {
129 pub fn new() -> Self {
130 Self {
131 bitmap: RoaringBitmap::new(),
132 }
133 }
134
135 pub fn insert(&mut self, id: u64) {
136 self.bitmap.insert(id as u32);
137 }
138
139 pub fn contains(&self, id: u64) -> bool {
140 self.bitmap.contains(id as u32)
141 }
142
143 pub fn remove(&mut self, id: u64) {
144 self.bitmap.remove(id as u32);
145 }
146
147 pub fn len(&self) -> u64 {
148 self.bitmap.len()
149 }
150
151 pub fn is_empty(&self) -> bool {
152 self.bitmap.is_empty()
153 }
154
155 pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
156 self.bitmap.iter().map(|id| id as u64)
157 }
158
159 pub fn intersect(&self, other: &Self) -> Self {
161 Self {
162 bitmap: &self.bitmap & &other.bitmap,
163 }
164 }
165
166 pub fn union(&self, other: &Self) -> Self {
168 Self {
169 bitmap: &self.bitmap | &other.bitmap,
170 }
171 }
172
173 pub fn serialize(&self) -> Vec<u8> {
175 let mut bytes = Vec::new();
176 self.bitmap
177 .serialize_into(&mut bytes)
178 .expect("Failed to serialize bitmap");
179 bytes
180 }
181
182 pub fn deserialize(bytes: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
184 let bitmap = RoaringBitmap::deserialize_from(bytes)?;
185 Ok(Self { bitmap })
186 }
187}
188
189impl Default for RoaringBitmapIndex {
190 fn default() -> Self {
191 Self::new()
192 }
193}
194
195pub struct DeltaEncodedList {
198 base: u64,
200 deltas: Vec<u32>,
202}
203
204impl DeltaEncodedList {
205 pub fn new() -> Self {
206 Self {
207 base: 0,
208 deltas: Vec::new(),
209 }
210 }
211
212 pub fn encode(ids: &[u64]) -> Self {
214 if ids.is_empty() {
215 return Self::new();
216 }
217
218 let base = ids[0];
219 let deltas = ids
220 .windows(2)
221 .map(|pair| (pair[1] - pair[0]) as u32)
222 .collect();
223
224 Self { base, deltas }
225 }
226
227 pub fn decode(&self) -> Vec<u64> {
229 if self.deltas.is_empty() {
230 if self.base == 0 {
231 return Vec::new();
232 }
233 return vec![self.base];
234 }
235
236 let mut result = Vec::with_capacity(self.deltas.len() + 1);
237 result.push(self.base);
238
239 let mut current = self.base;
240 for &delta in &self.deltas {
241 current += delta as u64;
242 result.push(current);
243 }
244
245 result
246 }
247
248 pub fn compression_ratio(&self) -> f64 {
250 let original_size = (self.deltas.len() + 1) * 8; let compressed_size = 8 + self.deltas.len() * 4; original_size as f64 / compressed_size as f64
253 }
254}
255
256impl Default for DeltaEncodedList {
257 fn default() -> Self {
258 Self::new()
259 }
260}
261
262pub struct DeltaEncoder;
264
265impl DeltaEncoder {
266 pub fn encode(values: &[u64]) -> Vec<u8> {
268 if values.is_empty() {
269 return Vec::new();
270 }
271
272 let mut result = Vec::new();
273
274 result.extend_from_slice(&values[0].to_le_bytes());
276
277 for window in values.windows(2) {
279 let delta = (window[1] - window[0]) as u32;
280 result.extend_from_slice(&delta.to_le_bytes());
281 }
282
283 result
284 }
285
286 pub fn decode(bytes: &[u8]) -> Vec<u64> {
288 if bytes.len() < 8 {
289 return Vec::new();
290 }
291
292 let mut result = Vec::new();
293
294 let base = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
296 result.push(base);
297
298 let mut current = base;
300 for chunk in bytes[8..].chunks(4) {
301 if chunk.len() == 4 {
302 let delta = u32::from_le_bytes(chunk.try_into().unwrap());
303 current += delta as u64;
304 result.push(current);
305 }
306 }
307
308 result
309 }
310}
311
312struct StringDictionary {
314 string_to_id: HashMap<String, u32>,
316 id_to_string: HashMap<u32, String>,
318 next_id: u32,
320}
321
322impl StringDictionary {
323 fn new() -> Self {
324 Self {
325 string_to_id: HashMap::new(),
326 id_to_string: HashMap::new(),
327 next_id: 0,
328 }
329 }
330
331 fn encode(&mut self, s: &str) -> u32 {
332 if let Some(&id) = self.string_to_id.get(s) {
333 return id;
334 }
335
336 let id = self.next_id;
337 self.next_id += 1;
338
339 self.string_to_id.insert(s.to_string(), id);
340 self.id_to_string.insert(id, s.to_string());
341
342 id
343 }
344
345 fn decode(&self, id: u32) -> Option<String> {
346 self.id_to_string.get(&id).cloned()
347 }
348
349 fn len(&self) -> usize {
350 self.string_to_id.len()
351 }
352}
353
354#[cfg(test)]
355mod tests {
356 use super::*;
357
358 #[test]
359 fn test_compressed_index() {
360 let index = CompressedIndex::new();
361
362 index.add_to_label_index("Person", 1);
363 index.add_to_label_index("Person", 2);
364 index.add_to_label_index("Person", 3);
365 index.add_to_label_index("Employee", 2);
366 index.add_to_label_index("Employee", 3);
367
368 let persons = index.get_nodes_by_label("Person");
369 assert_eq!(persons.len(), 3);
370
371 let intersection = index.intersect_labels(&["Person", "Employee"]);
372 assert_eq!(intersection.len(), 2);
373
374 let union = index.union_labels(&["Person", "Employee"]);
375 assert_eq!(union.len(), 3);
376 }
377
378 #[test]
379 fn test_roaring_bitmap() {
380 let mut bitmap = RoaringBitmapIndex::new();
381
382 bitmap.insert(1);
383 bitmap.insert(100);
384 bitmap.insert(1000);
385
386 assert!(bitmap.contains(1));
387 assert!(bitmap.contains(100));
388 assert!(!bitmap.contains(50));
389
390 assert_eq!(bitmap.len(), 3);
391 }
392
393 #[test]
394 fn test_delta_encoding() {
395 let ids = vec![100, 102, 105, 110, 120];
396 let encoded = DeltaEncodedList::encode(&ids);
397 let decoded = encoded.decode();
398
399 assert_eq!(ids, decoded);
400 assert!(encoded.compression_ratio() > 1.0);
401 }
402
403 #[test]
404 fn test_delta_encoder() {
405 let values = vec![1000, 1005, 1010, 1020, 1030];
406 let encoded = DeltaEncoder::encode(&values);
407 let decoded = DeltaEncoder::decode(&encoded);
408
409 assert_eq!(values, decoded);
410
411 assert!(encoded.len() < values.len() * 8);
413 }
414
415 #[test]
416 fn test_string_dictionary() {
417 let index = CompressedIndex::new();
418
419 let id1 = index.encode_string("hello");
420 let id2 = index.encode_string("world");
421 let id3 = index.encode_string("hello"); assert_eq!(id1, id3); assert_ne!(id1, id2);
425
426 assert_eq!(index.decode_string(id1), Some("hello".to_string()));
427 assert_eq!(index.decode_string(id2), Some("world".to_string()));
428 }
429}