Skip to main content

veclite_core/
lib.rs

1use rayon::prelude::*;
2use serde::{Deserialize, Serialize};
3use std::path::Path;
4use thiserror::Error;
5
6pub use veclite_index::{
7    hnsw::{HnswConfig, HnswIndex},
8    CosineMetric, DotMetric, EuclideanMetric, ManhattanMetric, Metric, SimilarityMetric,
9};
10pub use veclite_storage::{Record, Storage, StorageError};
11
12#[derive(Error, Debug)]
13pub enum VecLiteError {
14    #[error("Storage error: {0}")]
15    Storage(#[from] StorageError),
16    #[error("Serialization error: {0}")]
17    Serde(#[from] serde_json::Error),
18}
19
20pub type Result<T> = std::result::Result<T, VecLiteError>;
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct SearchResult {
24    pub id: String,
25    pub score: f32,
26    pub metadata: Option<serde_json::Value>,
27}
28
29pub struct SearchBuilder<'a> {
30    db: &'a VecLite,
31    query: Vec<f32>,
32    k: usize,
33    filters: Vec<(&'a str, serde_json::Value)>,
34    decay_factor: Option<f32>,
35    current_time: Option<u64>,
36}
37
38impl<'a> SearchBuilder<'a> {
39    pub fn new(db: &'a VecLite, query: Vec<f32>) -> Self {
40        Self {
41            db,
42            query,
43            k: 5,
44            filters: Vec::new(),
45            decay_factor: None,
46            current_time: None,
47        }
48    }
49
50    pub fn top_k(mut self, k: usize) -> Self {
51        self.k = k;
52        self
53    }
54
55    pub fn filter<V: Serialize>(mut self, key: &'a str, value: V) -> Self {
56        if let Ok(v) = serde_json::to_value(value) {
57            self.filters.push((key, v));
58        }
59        self
60    }
61
62    pub fn time_decay(mut self, factor: f32, current_time: u64) -> Self {
63        self.decay_factor = Some(factor);
64        self.current_time = Some(current_time);
65        self
66    }
67
68    pub fn execute(self) -> Result<Vec<SearchResult>> {
69        // If we have an HNSW index and NO filters and NO time decay, we can use it!
70        // (HNSW doesn't inherently support post-filtering easily without more work,
71        //  but for simple searches it's perfect.)
72        let use_hnsw =
73            self.filters.is_empty() && self.decay_factor.is_none() && self.db.index.is_some();
74
75        if use_hnsw {
76            let hnsw = self.db.index.as_ref().unwrap();
77
78            let get_vector = |idx: usize| self.db.storage.records[idx].vector.as_slice();
79
80            let distance_fn = |a: &[f32], b: &[f32]| match self.db.metric {
81                Metric::Cosine => CosineMetric::distance(a, b),
82                Metric::DotProduct => DotMetric::distance(a, b),
83                Metric::Euclidean => EuclideanMetric::distance(a, b),
84                Metric::Manhattan => ManhattanMetric::distance(a, b),
85            };
86
87            let results_indices = hnsw.search(
88                &self.query,
89                self.k,
90                hnsw.config.ef_search,
91                &get_vector,
92                &distance_fn,
93            );
94
95            let mut results = Vec::with_capacity(results_indices.len());
96            for (idx, score) in results_indices {
97                let r = &self.db.storage.records[idx];
98                results.push(SearchResult {
99                    id: r.id.clone(),
100                    score,
101                    metadata: r.metadata.clone(),
102                });
103            }
104
105            return Ok(results);
106        }
107
108        let mut results: Vec<SearchResult> = self
109            .db
110            .storage
111            .records
112            .par_iter()
113            .filter(|r| {
114                if self.filters.is_empty() {
115                    return true;
116                }
117                if let Some(ref meta) = r.metadata {
118                    for (k, v) in &self.filters {
119                        if meta.get(*k) != Some(v) {
120                            return false;
121                        }
122                    }
123                    true
124                } else {
125                    false
126                }
127            })
128            .map(|r| {
129                let mut score = match self.db.metric {
130                    Metric::Cosine => CosineMetric::distance(&self.query, &r.vector),
131                    Metric::DotProduct => DotMetric::distance(&self.query, &r.vector),
132                    Metric::Euclidean => EuclideanMetric::distance(&self.query, &r.vector),
133                    Metric::Manhattan => ManhattanMetric::distance(&self.query, &r.vector),
134                };
135
136                if let (Some(factor), Some(current), Some(ts)) =
137                    (self.decay_factor, self.current_time, r.timestamp)
138                {
139                    if current > ts {
140                        let age = (current - ts) as f32;
141                        score *= factor.powf(age / 86400.0);
142                    }
143                }
144
145                SearchResult {
146                    id: r.id.clone(),
147                    score,
148                    metadata: r.metadata.clone(),
149                }
150            })
151            .collect();
152
153        let higher_better = match self.db.metric {
154            Metric::Cosine | Metric::DotProduct => true,
155            Metric::Euclidean | Metric::Manhattan => false,
156        };
157
158        if higher_better {
159            results.sort_by(|a, b| {
160                b.score
161                    .partial_cmp(&a.score)
162                    .unwrap_or(std::cmp::Ordering::Equal)
163            });
164        } else {
165            results.sort_by(|a, b| {
166                a.score
167                    .partial_cmp(&b.score)
168                    .unwrap_or(std::cmp::Ordering::Equal)
169            });
170        }
171
172        results.truncate(self.k);
173        Ok(results)
174    }
175}
176
177pub struct VecLite {
178    pub storage: Storage,
179    pub metric: Metric,
180    pub index: Option<HnswIndex>,
181}
182
183impl VecLite {
184    /// Open database
185    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
186        Self::open_with_metric(path, Metric::Cosine)
187    }
188
189    /// Open database with specific metric
190    pub fn open_with_metric<P: AsRef<Path>>(path: P, metric: Metric) -> Result<Self> {
191        let storage = Storage::open(path)?;
192        let db = Self {
193            storage,
194            metric,
195            index: None,
196        };
197        // By default, let's say we don't enable HNSW unless they call a build method,
198        // but wait! The prompt said "When inserting a vector, add it to the HnswIndex if HNSW is enabled."
199        // Let's create an enable_hnsw() function.
200        Ok(db)
201    }
202
203    pub fn enable_hnsw(&mut self, config: HnswConfig) {
204        let mut index = HnswIndex::new(config);
205
206        let get_vector = |idx: usize| self.storage.records[idx].vector.as_slice();
207        let distance_fn = |a: &[f32], b: &[f32]| match self.metric {
208            Metric::Cosine => CosineMetric::distance(a, b),
209            Metric::DotProduct => DotMetric::distance(a, b),
210            Metric::Euclidean => EuclideanMetric::distance(a, b),
211            Metric::Manhattan => ManhattanMetric::distance(a, b),
212        };
213
214        // Rebuild index from existing records
215        for i in 0..self.storage.records.len() {
216            index.insert(
217                i,
218                &self.storage.records[i].vector,
219                &get_vector,
220                &distance_fn,
221            );
222        }
223
224        self.index = Some(index);
225    }
226
227    /// Insert single vector
228    pub fn insert(
229        &mut self,
230        id: &str,
231        vector: Vec<f32>,
232        metadata: Option<serde_json::Value>,
233    ) -> Result<()> {
234        self.insert_with_time(id, vector, metadata, None)
235    }
236
237    /// Insert vector with time
238    pub fn insert_with_time(
239        &mut self,
240        id: &str,
241        vector: Vec<f32>,
242        metadata: Option<serde_json::Value>,
243        timestamp: Option<u64>,
244    ) -> Result<()> {
245        let record = Record {
246            id: id.to_string(),
247            vector: vector.clone(),
248            metadata,
249            timestamp,
250        };
251
252        let idx = self.storage.records.len();
253        self.storage.append(record)?;
254
255        if let Some(index) = &mut self.index {
256            let get_vector = |i: usize| self.storage.records[i].vector.as_slice();
257            let distance_fn = |a: &[f32], b: &[f32]| match self.metric {
258                Metric::Cosine => CosineMetric::distance(a, b),
259                Metric::DotProduct => DotMetric::distance(a, b),
260                Metric::Euclidean => EuclideanMetric::distance(a, b),
261                Metric::Manhattan => ManhattanMetric::distance(a, b),
262            };
263            index.insert(idx, &vector, &get_vector, &distance_fn);
264        }
265
266        Ok(())
267    }
268
269    /// Insert multiple vectors
270    pub fn insert_batch(
271        &mut self,
272        records: Vec<(&str, Vec<f32>, Option<serde_json::Value>)>,
273    ) -> Result<()> {
274        let start_idx = self.storage.records.len();
275        let mut recs = Vec::new();
276        for (id, vector, metadata) in records {
277            recs.push(Record {
278                id: id.to_string(),
279                vector,
280                metadata,
281                timestamp: None,
282            });
283        }
284        self.storage.append_batch(recs.clone())?;
285
286        if let Some(index) = &mut self.index {
287            let get_vector = |i: usize| self.storage.records[i].vector.as_slice();
288            let distance_fn = |a: &[f32], b: &[f32]| match self.metric {
289                Metric::Cosine => CosineMetric::distance(a, b),
290                Metric::DotProduct => DotMetric::distance(a, b),
291                Metric::Euclidean => EuclideanMetric::distance(a, b),
292                Metric::Manhattan => ManhattanMetric::distance(a, b),
293            };
294
295            for (offset, record) in recs.iter().enumerate() {
296                index.insert(
297                    start_idx + offset,
298                    &record.vector,
299                    &get_vector,
300                    &distance_fn,
301                );
302            }
303        }
304
305        Ok(())
306    }
307
308    /// Simple search
309    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
310        SearchBuilder::new(self, query.to_vec()).top_k(k).execute()
311    }
312
313    /// Search with filters and decay
314    pub fn build_search(&self, query: &[f32]) -> SearchBuilder<'_> {
315        SearchBuilder::new(self, query.to_vec())
316    }
317
318    /// Database statistics
319    pub fn stats(&self) -> Result<(usize, usize)> {
320        let stats = self.storage.stats()?;
321        Ok(stats)
322    }
323
324    /// Retrieve a record by its ID
325    pub fn get(&self, id: &str) -> Option<SearchResult> {
326        self.storage
327            .records
328            .iter()
329            .rev()
330            .find(|r| r.id == id)
331            .map(|r| SearchResult {
332                id: r.id.clone(),
333                score: 0.0,
334                metadata: r.metadata.clone(),
335            })
336    }
337}
338
339impl VecLite {
340    /// Delete a vector by removing it from storage
341    pub fn delete(&mut self, id: &str) -> Result<()> {
342        let removed = self.storage.delete(id)?;
343
344        if removed {
345            // Rebuild the in-memory index if it exists since we shifted indices
346            if let Some(ref config) = self.index.as_ref().map(|idx| idx.config.clone()) {
347                self.enable_hnsw(config.clone());
348            }
349        }
350
351        Ok(())
352    }
353
354    /// Update a vector
355    pub fn update(
356        &mut self,
357        id: &str,
358        vector: Vec<f32>,
359        metadata: Option<serde_json::Value>,
360    ) -> Result<()> {
361        self.delete(id)?;
362        self.insert(id, vector, metadata)?;
363        Ok(())
364    }
365
366    /// Compact the database
367    pub fn compact(&mut self) -> Result<()> {
368        // redb automatically handles compaction/free space reuse
369        Ok(())
370    }
371}