1use rayon::prelude::*;
2use serde::{Deserialize, Serialize};
3use std::path::Path;
4use thiserror::Error;
5
6pub use veclite_index::{
7 hnsw::{HnswConfig, HnswIndex},
8 CosineMetric, DotMetric, EuclideanMetric, ManhattanMetric, Metric, SimilarityMetric,
9};
10pub use veclite_storage::{Record, Storage, StorageError};
11
12#[derive(Error, Debug)]
13pub enum VecLiteError {
14 #[error("Storage error: {0}")]
15 Storage(#[from] StorageError),
16 #[error("Serialization error: {0}")]
17 Serde(#[from] serde_json::Error),
18}
19
20pub type Result<T> = std::result::Result<T, VecLiteError>;
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct SearchResult {
24 pub id: String,
25 pub score: f32,
26 pub metadata: Option<serde_json::Value>,
27}
28
29pub struct SearchBuilder<'a> {
30 db: &'a VecLite,
31 query: Vec<f32>,
32 k: usize,
33 filters: Vec<(&'a str, serde_json::Value)>,
34 decay_factor: Option<f32>,
35 current_time: Option<u64>,
36}
37
38impl<'a> SearchBuilder<'a> {
39 pub fn new(db: &'a VecLite, query: Vec<f32>) -> Self {
40 Self {
41 db,
42 query,
43 k: 5,
44 filters: Vec::new(),
45 decay_factor: None,
46 current_time: None,
47 }
48 }
49
50 pub fn top_k(mut self, k: usize) -> Self {
51 self.k = k;
52 self
53 }
54
55 pub fn filter<V: Serialize>(mut self, key: &'a str, value: V) -> Self {
56 if let Ok(v) = serde_json::to_value(value) {
57 self.filters.push((key, v));
58 }
59 self
60 }
61
62 pub fn time_decay(mut self, factor: f32, current_time: u64) -> Self {
63 self.decay_factor = Some(factor);
64 self.current_time = Some(current_time);
65 self
66 }
67
68 pub fn execute(self) -> Result<Vec<SearchResult>> {
69 let use_hnsw =
73 self.filters.is_empty() && self.decay_factor.is_none() && self.db.index.is_some();
74
75 if use_hnsw {
76 let hnsw = self.db.index.as_ref().unwrap();
77
78 let get_vector = |idx: usize| self.db.storage.records[idx].vector.as_slice();
79
80 let distance_fn = |a: &[f32], b: &[f32]| match self.db.metric {
81 Metric::Cosine => CosineMetric::distance(a, b),
82 Metric::DotProduct => DotMetric::distance(a, b),
83 Metric::Euclidean => EuclideanMetric::distance(a, b),
84 Metric::Manhattan => ManhattanMetric::distance(a, b),
85 };
86
87 let results_indices = hnsw.search(
88 &self.query,
89 self.k,
90 hnsw.config.ef_search,
91 &get_vector,
92 &distance_fn,
93 );
94
95 let mut results = Vec::with_capacity(results_indices.len());
96 for (idx, score) in results_indices {
97 let r = &self.db.storage.records[idx];
98 results.push(SearchResult {
99 id: r.id.clone(),
100 score,
101 metadata: r.metadata.clone(),
102 });
103 }
104
105 return Ok(results);
106 }
107
108 let mut results: Vec<SearchResult> = self
109 .db
110 .storage
111 .records
112 .par_iter()
113 .filter(|r| {
114 if self.filters.is_empty() {
115 return true;
116 }
117 if let Some(ref meta) = r.metadata {
118 for (k, v) in &self.filters {
119 if meta.get(*k) != Some(v) {
120 return false;
121 }
122 }
123 true
124 } else {
125 false
126 }
127 })
128 .map(|r| {
129 let mut score = match self.db.metric {
130 Metric::Cosine => CosineMetric::distance(&self.query, &r.vector),
131 Metric::DotProduct => DotMetric::distance(&self.query, &r.vector),
132 Metric::Euclidean => EuclideanMetric::distance(&self.query, &r.vector),
133 Metric::Manhattan => ManhattanMetric::distance(&self.query, &r.vector),
134 };
135
136 if let (Some(factor), Some(current), Some(ts)) =
137 (self.decay_factor, self.current_time, r.timestamp)
138 {
139 if current > ts {
140 let age = (current - ts) as f32;
141 score *= factor.powf(age / 86400.0);
142 }
143 }
144
145 SearchResult {
146 id: r.id.clone(),
147 score,
148 metadata: r.metadata.clone(),
149 }
150 })
151 .collect();
152
153 let higher_better = match self.db.metric {
154 Metric::Cosine | Metric::DotProduct => true,
155 Metric::Euclidean | Metric::Manhattan => false,
156 };
157
158 if higher_better {
159 results.sort_by(|a, b| {
160 b.score
161 .partial_cmp(&a.score)
162 .unwrap_or(std::cmp::Ordering::Equal)
163 });
164 } else {
165 results.sort_by(|a, b| {
166 a.score
167 .partial_cmp(&b.score)
168 .unwrap_or(std::cmp::Ordering::Equal)
169 });
170 }
171
172 results.truncate(self.k);
173 Ok(results)
174 }
175}
176
177pub struct VecLite {
178 pub storage: Storage,
179 pub metric: Metric,
180 pub index: Option<HnswIndex>,
181}
182
183impl VecLite {
184 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
186 Self::open_with_metric(path, Metric::Cosine)
187 }
188
189 pub fn open_with_metric<P: AsRef<Path>>(path: P, metric: Metric) -> Result<Self> {
191 let storage = Storage::open(path)?;
192 let db = Self {
193 storage,
194 metric,
195 index: None,
196 };
197 Ok(db)
201 }
202
203 pub fn enable_hnsw(&mut self, config: HnswConfig) {
204 let mut index = HnswIndex::new(config);
205
206 let get_vector = |idx: usize| self.storage.records[idx].vector.as_slice();
207 let distance_fn = |a: &[f32], b: &[f32]| match self.metric {
208 Metric::Cosine => CosineMetric::distance(a, b),
209 Metric::DotProduct => DotMetric::distance(a, b),
210 Metric::Euclidean => EuclideanMetric::distance(a, b),
211 Metric::Manhattan => ManhattanMetric::distance(a, b),
212 };
213
214 for i in 0..self.storage.records.len() {
216 index.insert(
217 i,
218 &self.storage.records[i].vector,
219 &get_vector,
220 &distance_fn,
221 );
222 }
223
224 self.index = Some(index);
225 }
226
227 pub fn insert(
229 &mut self,
230 id: &str,
231 vector: Vec<f32>,
232 metadata: Option<serde_json::Value>,
233 ) -> Result<()> {
234 self.insert_with_time(id, vector, metadata, None)
235 }
236
237 pub fn insert_with_time(
239 &mut self,
240 id: &str,
241 vector: Vec<f32>,
242 metadata: Option<serde_json::Value>,
243 timestamp: Option<u64>,
244 ) -> Result<()> {
245 let record = Record {
246 id: id.to_string(),
247 vector: vector.clone(),
248 metadata,
249 timestamp,
250 };
251
252 let idx = self.storage.records.len();
253 self.storage.append(record)?;
254
255 if let Some(index) = &mut self.index {
256 let get_vector = |i: usize| self.storage.records[i].vector.as_slice();
257 let distance_fn = |a: &[f32], b: &[f32]| match self.metric {
258 Metric::Cosine => CosineMetric::distance(a, b),
259 Metric::DotProduct => DotMetric::distance(a, b),
260 Metric::Euclidean => EuclideanMetric::distance(a, b),
261 Metric::Manhattan => ManhattanMetric::distance(a, b),
262 };
263 index.insert(idx, &vector, &get_vector, &distance_fn);
264 }
265
266 Ok(())
267 }
268
269 pub fn insert_batch(
271 &mut self,
272 records: Vec<(&str, Vec<f32>, Option<serde_json::Value>)>,
273 ) -> Result<()> {
274 let start_idx = self.storage.records.len();
275 let mut recs = Vec::new();
276 for (id, vector, metadata) in records {
277 recs.push(Record {
278 id: id.to_string(),
279 vector,
280 metadata,
281 timestamp: None,
282 });
283 }
284 self.storage.append_batch(recs.clone())?;
285
286 if let Some(index) = &mut self.index {
287 let get_vector = |i: usize| self.storage.records[i].vector.as_slice();
288 let distance_fn = |a: &[f32], b: &[f32]| match self.metric {
289 Metric::Cosine => CosineMetric::distance(a, b),
290 Metric::DotProduct => DotMetric::distance(a, b),
291 Metric::Euclidean => EuclideanMetric::distance(a, b),
292 Metric::Manhattan => ManhattanMetric::distance(a, b),
293 };
294
295 for (offset, record) in recs.iter().enumerate() {
296 index.insert(
297 start_idx + offset,
298 &record.vector,
299 &get_vector,
300 &distance_fn,
301 );
302 }
303 }
304
305 Ok(())
306 }
307
308 pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
310 SearchBuilder::new(self, query.to_vec()).top_k(k).execute()
311 }
312
313 pub fn build_search(&self, query: &[f32]) -> SearchBuilder<'_> {
315 SearchBuilder::new(self, query.to_vec())
316 }
317
318 pub fn stats(&self) -> Result<(usize, usize)> {
320 let stats = self.storage.stats()?;
321 Ok(stats)
322 }
323
324 pub fn get(&self, id: &str) -> Option<SearchResult> {
326 self.storage
327 .records
328 .iter()
329 .rev()
330 .find(|r| r.id == id)
331 .map(|r| SearchResult {
332 id: r.id.clone(),
333 score: 0.0,
334 metadata: r.metadata.clone(),
335 })
336 }
337}
338
339impl VecLite {
340 pub fn delete(&mut self, id: &str) -> Result<()> {
342 let removed = self.storage.delete(id)?;
343
344 if removed {
345 if let Some(ref config) = self.index.as_ref().map(|idx| idx.config.clone()) {
347 self.enable_hnsw(config.clone());
348 }
349 }
350
351 Ok(())
352 }
353
354 pub fn update(
356 &mut self,
357 id: &str,
358 vector: Vec<f32>,
359 metadata: Option<serde_json::Value>,
360 ) -> Result<()> {
361 self.delete(id)?;
362 self.insert(id, vector, metadata)?;
363 Ok(())
364 }
365
366 pub fn compact(&mut self) -> Result<()> {
368 Ok(())
370 }
371}