velesdb-core 1.13.5

High-performance vector database engine written in Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
//! Index management methods for Collection (EPIC-009 propagation).

use crate::collection::types::Collection;
use crate::error::Result;
use crate::index::{JsonValue, SecondaryIndex};
use parking_lot::RwLock;
use std::collections::BTreeMap;

/// Index information response for API.
#[derive(Debug, Clone)]
pub struct IndexInfo {
    /// Node label.
    pub label: String,
    /// Property name.
    pub property: String,
    /// Index type (hash or range).
    pub index_type: String,
    /// Number of unique values indexed.
    pub cardinality: usize,
    /// Memory usage in bytes.
    pub memory_bytes: usize,
}

impl Collection {
    /// Creates a secondary metadata index for a payload field.
    ///
    /// When the index already exists, triggers a backfill to ensure all
    /// existing payloads are indexed (handles the case where bulk insert
    /// skipped per-point index updates).
    ///
    /// # Errors
    ///
    /// Returns Ok(()) on success. Index creation is idempotent.
    #[allow(clippy::unnecessary_wraps)] // Reason: Public API contract — callers expect Result
    pub fn create_index(&self, field_name: &str) -> Result<()> {
        let mut indexes = self.secondary_indexes.write();
        let is_new = !indexes.contains_key(field_name);
        indexes
            .entry(field_name.to_string())
            .or_insert_with(|| SecondaryIndex::BTree(RwLock::new(BTreeMap::new())));

        // Backfill: scan existing payloads to populate the index.
        // Runs for both new indexes AND existing indexes (to catch points
        // inserted via bulk paths that skipped per-point index updates).
        drop(indexes); // Release write lock before reading payloads
        self.backfill_secondary_index(field_name, is_new);

        Ok(())
    }

    /// Scans existing payloads and populates the secondary index for `field_name`.
    ///
    /// Runs for both new and existing indexes to catch points inserted via
    /// bulk paths that skipped per-point index updates.
    fn backfill_secondary_index(&self, field_name: &str, is_new: bool) {
        use crate::storage::PayloadStorage;

        let payload_storage = self.payload_storage.read();
        let ids = PayloadStorage::ids(&*payload_storage);
        let indexes = self.secondary_indexes.read();
        let Some(index) = indexes.get(field_name) else {
            return;
        };
        let SecondaryIndex::BTree(ref tree) = index;
        let mut tree_guard = tree.write();
        for id in ids {
            Self::backfill_single_payload(&*payload_storage, id, field_name, &mut tree_guard);
        }
        if !is_new {
            tracing::debug!(
                field = field_name,
                "create_index: backfilled existing index (bulk insert recovery)"
            );
        }
    }

    /// Indexes a single payload entry for the given field, if present.
    fn backfill_single_payload(
        payload_storage: &dyn crate::storage::PayloadStorage,
        id: u64,
        field_name: &str,
        tree_guard: &mut std::collections::BTreeMap<JsonValue, Vec<u64>>,
    ) {
        if let Ok(Some(payload)) = payload_storage.retrieve(id) {
            if let Some(val) = payload.get(field_name) {
                if let Some(key) = JsonValue::from_json(val) {
                    let ids_vec = tree_guard.entry(key).or_default();
                    if !ids_vec.contains(&id) {
                        ids_vec.push(id);
                    }
                }
            }
        }
    }

    /// Drops a secondary metadata index for a payload field.
    ///
    /// Returns `true` if the index existed and was removed, `false` if no
    /// such index existed.
    #[must_use]
    pub fn drop_secondary_index(&self, field_name: &str) -> bool {
        self.secondary_indexes.write().remove(field_name).is_some()
    }

    /// Checks whether a secondary metadata index exists for a field.
    #[must_use]
    pub fn has_secondary_index(&self, field_name: &str) -> bool {
        self.secondary_indexes.read().contains_key(field_name)
    }

    /// Returns the set of payload field names covered by a secondary index
    /// (issue #607).
    ///
    /// Threaded into `QueryPlan::from_query_with_stats` via
    /// `Database::build_plan_with_stats` so `IndexLookup` plan nodes are
    /// generated when an `EXPLAIN` target references an indexed column.
    /// Returns an empty set for collections with no indexes registered.
    #[must_use]
    pub fn indexed_field_names(&self) -> std::collections::HashSet<String> {
        self.secondary_indexes.read().keys().cloned().collect()
    }

    /// Looks up matching point IDs for an indexed field value.
    #[must_use]
    pub fn secondary_index_lookup(&self, field_name: &str, value: &JsonValue) -> Option<Vec<u64>> {
        let indexes = self.secondary_indexes.read();
        let index = indexes.get(field_name)?;
        match index {
            SecondaryIndex::BTree(tree) => tree.read().get(value).cloned(),
        }
    }

    /// Builds a pre-filter bitmap from a [`Filter`] using secondary indexes.
    ///
    /// Supports `Eq`, `Neq` (universe subtraction), `Gt`/`Gte`/`Lt`/`Lte`
    /// (range scan), `And` (intersection), and `Or` (union, only when all
    /// children resolve). Returns `None` when the condition cannot be resolved
    /// via indexes (e.g., `Not`, non-indexed fields), signalling the caller to
    /// fall back to post-filter.
    #[must_use]
    pub(crate) fn build_prefilter_bitmap(
        &self,
        filter: &crate::filter::Filter,
    ) -> Option<roaring::RoaringBitmap> {
        Self::bitmap_from_condition(&self.secondary_indexes, &filter.condition)
    }

    /// Recursively extracts bitmaps from conditions backed by secondary indexes.
    ///
    /// Supported conditions:
    /// - `Eq`: exact-match lookup
    /// - `Neq`: universe bitmap minus exact-match (all indexed IDs except matches)
    /// - `Gt`, `Gte`, `Lt`, `Lte`: range scan via `BTreeMap::range()`
    /// - `In`: union of per-value B-tree lookups
    /// - `Not { In }`: universe bitmap minus IN bitmap (set complement)
    /// - `And`: intersection of child bitmaps
    /// - `Or`: union of child bitmaps (all children must resolve)
    ///
    /// Returns `None` for `Not` wrapping non-`In` conditions and unsupported conditions.
    fn bitmap_from_condition(
        indexes: &std::sync::Arc<
            parking_lot::RwLock<std::collections::HashMap<String, SecondaryIndex>>,
        >,
        cond: &crate::filter::Condition,
    ) -> Option<roaring::RoaringBitmap> {
        match cond {
            crate::filter::Condition::Eq { field, value } => {
                Self::bitmap_for_eq_field(indexes, field, value)
            }
            crate::filter::Condition::Neq { field, value } => {
                // NEQ = universe - eq_matches
                // Build the universe from all keys in the index for this field,
                // then subtract the matching IDs. If the value doesn't exist in
                // the index, eq_bitmap defaults to empty — every indexed ID matches.
                let universe = Self::bitmap_universe_for_field(indexes, field)?;
                let eq_bitmap =
                    Self::bitmap_for_eq_field(indexes, field, value).unwrap_or_default();
                Some(universe - eq_bitmap)
            }
            crate::filter::Condition::Gt { field, value }
            | crate::filter::Condition::Gte { field, value }
            | crate::filter::Condition::Lt { field, value }
            | crate::filter::Condition::Lte { field, value } => {
                Self::bitmap_for_range_field(indexes, field, value, cond)
            }
            crate::filter::Condition::In { field, values } => {
                Self::bitmap_for_in_field(indexes, field, values)
            }
            crate::filter::Condition::Not { condition } => {
                Self::bitmap_for_not_in(indexes, condition)
            }
            crate::filter::Condition::And { conditions } => {
                Self::bitmap_from_and(indexes, conditions)
            }
            crate::filter::Condition::Or { conditions } => {
                Self::bitmap_from_or(indexes, conditions)
            }
            _ => None,
        }
    }

    /// Builds a universe bitmap containing ALL IDs indexed for a given field.
    fn bitmap_universe_for_field(
        indexes: &std::sync::Arc<
            parking_lot::RwLock<std::collections::HashMap<String, SecondaryIndex>>,
        >,
        field: &str,
    ) -> Option<roaring::RoaringBitmap> {
        let guard = indexes.read();
        let index = guard.get(field)?;
        Some(index.all_ids_bitmap())
    }

    /// Looks up a single equality field in the secondary indexes.
    fn bitmap_for_eq_field(
        indexes: &std::sync::Arc<
            parking_lot::RwLock<std::collections::HashMap<String, SecondaryIndex>>,
        >,
        field: &str,
        value: &serde_json::Value,
    ) -> Option<roaring::RoaringBitmap> {
        let key = JsonValue::from_json(value)?;
        let guard = indexes.read();
        let index = guard.get(field)?;
        let bm = index.to_bitmap(&key);
        if bm.is_empty() {
            return Some(bm); // Empty bitmap = no matches (valid pre-filter)
        }
        Some(bm)
    }

    /// Builds a bitmap for `IN(field, values)` by unioning per-value B-tree lookups.
    ///
    /// Acquires the secondary index read-lock once and iterates all values under
    /// the same guard. Values that don't convert to [`JsonValue`] or don't exist
    /// in the index are silently skipped (contribute empty bitmap).
    ///
    /// Time complexity: O(N × log K) where N = `values.len()`, K = index keys.
    /// Space: O(|result|) — single accumulator bitmap, no intermediate allocations.
    fn bitmap_for_in_field(
        indexes: &std::sync::Arc<
            parking_lot::RwLock<std::collections::HashMap<String, SecondaryIndex>>,
        >,
        field: &str,
        values: &[serde_json::Value],
    ) -> Option<roaring::RoaringBitmap> {
        if values.is_empty() {
            return Some(roaring::RoaringBitmap::new());
        }
        let guard = indexes.read();
        let index = guard.get(field)?;
        let mut acc = roaring::RoaringBitmap::new();
        for v in values {
            if let Some(key) = JsonValue::from_json(v) {
                acc |= index.to_bitmap(&key);
            }
        }
        Some(acc)
    }

    /// Handles `Condition::Not` by pattern-matching the inner condition.
    ///
    /// Currently only `Not { In { field, values } }` is supported — computes
    /// `universe - in_bitmap`. All other `Not` variants return `None`.
    fn bitmap_for_not_in(
        indexes: &std::sync::Arc<
            parking_lot::RwLock<std::collections::HashMap<String, SecondaryIndex>>,
        >,
        inner: &crate::filter::Condition,
    ) -> Option<roaring::RoaringBitmap> {
        if let crate::filter::Condition::In { field, values } = inner {
            let universe = Self::bitmap_universe_for_field(indexes, field)?;
            let in_bm = Self::bitmap_for_in_field(indexes, field, values).unwrap_or_default();
            Some(universe - in_bm)
        } else {
            None
        }
    }

    /// Builds a range bitmap for Gt/Gte/Lt/Lte using `SecondaryIndex::range_bitmap`.
    fn bitmap_for_range_field(
        indexes: &std::sync::Arc<
            parking_lot::RwLock<std::collections::HashMap<String, SecondaryIndex>>,
        >,
        field: &str,
        value: &serde_json::Value,
        cond: &crate::filter::Condition,
    ) -> Option<roaring::RoaringBitmap> {
        use std::ops::Bound;

        let key = JsonValue::from_json(value)?;
        let guard = indexes.read();
        let index = guard.get(field)?;
        let (from, to) = match cond {
            crate::filter::Condition::Gt { .. } => (Bound::Excluded(&key), Bound::Unbounded),
            crate::filter::Condition::Gte { .. } => (Bound::Included(&key), Bound::Unbounded),
            crate::filter::Condition::Lt { .. } => (Bound::Unbounded, Bound::Excluded(&key)),
            crate::filter::Condition::Lte { .. } => (Bound::Unbounded, Bound::Included(&key)),
            _ => return None,
        };
        Some(index.range_bitmap(from, to))
    }

    /// Intersects bitmaps from AND-ed conditions.
    fn bitmap_from_and(
        indexes: &std::sync::Arc<
            parking_lot::RwLock<std::collections::HashMap<String, SecondaryIndex>>,
        >,
        conditions: &[crate::filter::Condition],
    ) -> Option<roaring::RoaringBitmap> {
        let mut result: Option<roaring::RoaringBitmap> = None;
        for cond in conditions {
            if let Some(bm) = Self::bitmap_from_condition(indexes, cond) {
                result = Some(match result {
                    Some(existing) => existing & &bm,
                    None => bm,
                });
            }
        }
        result
    }

    /// Unions bitmaps from OR-ed conditions.
    ///
    /// If ANY child returns `None` (cannot be pre-filtered), the entire OR
    /// must return `None` because the union would be incomplete -- the
    /// post-filter must evaluate the full OR instead.
    fn bitmap_from_or(
        indexes: &std::sync::Arc<
            parking_lot::RwLock<std::collections::HashMap<String, SecondaryIndex>>,
        >,
        conditions: &[crate::filter::Condition],
    ) -> Option<roaring::RoaringBitmap> {
        let mut result = roaring::RoaringBitmap::new();
        for cond in conditions {
            let bm = Self::bitmap_from_condition(indexes, cond)?;
            result |= bm;
        }
        Some(result)
    }

    /// Create a property index for O(1) equality lookups.
    ///
    /// # Arguments
    ///
    /// * `label` - Node label to index (e.g., "Person")
    /// * `property` - Property name to index (e.g., "email")
    ///
    /// # Errors
    ///
    /// Returns Ok(()) on success. Index creation is idempotent.
    #[allow(clippy::unnecessary_wraps)] // Reason: Public API contract — callers expect Result
    pub fn create_property_index(&self, label: &str, property: &str) -> Result<()> {
        let mut index = self.property_index.write();
        index.create_index(label, property);
        Ok(())
    }

    /// Create a range index for O(log n) range queries.
    ///
    /// # Arguments
    ///
    /// * `label` - Node label to index (e.g., "Event")
    /// * `property` - Property name to index (e.g., "timestamp")
    ///
    /// # Errors
    ///
    /// Returns Ok(()) on success. Index creation is idempotent.
    #[allow(clippy::unnecessary_wraps)] // Reason: Public API contract — callers expect Result
    pub fn create_range_index(&self, label: &str, property: &str) -> Result<()> {
        let mut index = self.range_index.write();
        index.create_index(label, property);
        Ok(())
    }

    /// Check if a property index exists.
    #[must_use]
    pub fn has_property_index(&self, label: &str, property: &str) -> bool {
        self.property_index.read().has_index(label, property)
    }

    /// Check if a range index exists.
    #[must_use]
    pub fn has_range_index(&self, label: &str, property: &str) -> bool {
        self.range_index.read().has_index(label, property)
    }

    /// List all indexes on this collection.
    #[must_use]
    pub fn list_indexes(&self) -> Vec<IndexInfo> {
        let mut indexes = Vec::new();

        // Secondary indexes (metadata field indexes created via create_index)
        let sec_indexes = self.secondary_indexes.read();
        for (field, index) in sec_indexes.iter() {
            let cardinality = match index {
                crate::index::SecondaryIndex::BTree(tree) => tree.read().len(),
            };
            indexes.push(IndexInfo {
                label: "secondary".to_string(),
                property: field.clone(),
                index_type: "hash".to_string(),
                cardinality,
                memory_bytes: 0,
            });
        }
        drop(sec_indexes);

        // LOCK ORDER: property_index(7) read — then range_index(7) read.
        // Same level, reads-only; canonical order prevents deadlock.
        let prop_index = self.property_index.read();
        for (label, property) in prop_index.indexed_properties() {
            let cardinality = prop_index.cardinality(&label, &property).unwrap_or(0);
            indexes.push(IndexInfo {
                label,
                property,
                index_type: "hash".to_string(),
                cardinality,
                memory_bytes: 0,
            });
        }

        // List range indexes
        let range_idx = self.range_index.read();
        for (label, property) in range_idx.indexed_properties() {
            indexes.push(IndexInfo {
                label,
                property,
                index_type: "range".to_string(),
                cardinality: 0,
                memory_bytes: 0,
            });
        }

        indexes
    }

    /// Drop an index (either property or range).
    ///
    /// # Arguments
    ///
    /// * `label` - Node label
    /// * `property` - Property name
    ///
    /// # Returns
    ///
    /// Ok(true) if an index was dropped, Ok(false) if no index existed.
    ///
    /// # Errors
    ///
    /// Returns an error if underlying index stores fail while dropping.
    #[allow(clippy::unnecessary_wraps)] // Reason: Public API contract — callers expect Result
    pub fn drop_index(&self, label: &str, property: &str) -> Result<bool> {
        // Try property index first
        let dropped_prop = self.property_index.write().drop_index(label, property);
        if dropped_prop {
            return Ok(true);
        }

        // Try range index
        let dropped_range = self.range_index.write().drop_index(label, property);
        Ok(dropped_range)
    }

    /// Get total memory usage of all indexes.
    #[must_use]
    pub fn indexes_memory_usage(&self) -> usize {
        // LOCK ORDER: property_index(7) read — then range_index(7) read.
        // Same level, reads-only; canonical order prevents deadlock.
        let prop_mem = self.property_index.read().memory_usage();
        let range_mem = self.range_index.read().memory_usage();
        prop_mem + range_mem
    }
}