veclite-core 0.1.0

Core query and execution engine for VecLite
Documentation
use rayon::prelude::*;
use serde::Serialize;
use std::path::Path;
use thiserror::Error;

pub use veclite_index::{
    CosineMetric, DotMetric, EuclideanMetric, ManhattanMetric, Metric, SimilarityMetric,
};
pub use veclite_storage::{Record, Storage, StorageError};

#[derive(Error, Debug)]
pub enum VecLiteError {
    #[error("Storage error: {0}")]
    Storage(#[from] StorageError),
    #[error("Serialization error: {0}")]
    Serde(#[from] serde_json::Error),
}

pub type Result<T> = std::result::Result<T, VecLiteError>;

#[derive(Debug, Clone)]
pub struct SearchResult {
    pub id: String,
    pub score: f32,
    pub metadata: Option<serde_json::Value>,
}

pub struct SearchBuilder<'a> {
    db: &'a VecLite,
    query: Vec<f32>,
    k: usize,
    filters: Vec<(&'a str, serde_json::Value)>,
    decay_factor: Option<f32>,
    current_time: Option<u64>,
}

impl<'a> SearchBuilder<'a> {
    pub fn new(db: &'a VecLite, query: Vec<f32>) -> Self {
        Self {
            db,
            query,
            k: 5,
            filters: Vec::new(),
            decay_factor: None,
            current_time: None,
        }
    }

    pub fn top_k(mut self, k: usize) -> Self {
        self.k = k;
        self
    }

    pub fn filter<V: Serialize>(mut self, key: &'a str, value: V) -> Self {
        if let Ok(v) = serde_json::to_value(value) {
            self.filters.push((key, v));
        }
        self
    }

    pub fn time_decay(mut self, factor: f32, current_time: u64) -> Self {
        self.decay_factor = Some(factor);
        self.current_time = Some(current_time);
        self
    }

    pub fn execute(self) -> Result<Vec<SearchResult>> {
        let mut results: Vec<SearchResult> = self
            .db
            .storage
            .records
            .par_iter()
            .filter(|r| {
                if self.filters.is_empty() {
                    return true;
                }
                if let Some(ref meta) = r.metadata {
                    for (k, v) in &self.filters {
                        if meta.get(*k) != Some(v) {
                            return false;
                        }
                    }
                    true
                } else {
                    false
                }
            })
            .map(|r| {
                let mut score = match self.db.metric {
                    Metric::Cosine => CosineMetric::distance(&self.query, &r.vector),
                    Metric::DotProduct => DotMetric::distance(&self.query, &r.vector),
                    Metric::Euclidean => EuclideanMetric::distance(&self.query, &r.vector),
                    Metric::Manhattan => ManhattanMetric::distance(&self.query, &r.vector),
                };

                if let (Some(factor), Some(current), Some(ts)) =
                    (self.decay_factor, self.current_time, r.timestamp)
                {
                    if current > ts {
                        let age = (current - ts) as f32;
                        score *= factor.powf(age / 86400.0);
                    }
                }

                SearchResult {
                    id: r.id.clone(),
                    score,
                    metadata: r.metadata.clone(),
                }
            })
            .collect();

        let higher_better = match self.db.metric {
            Metric::Cosine | Metric::DotProduct => true,
            Metric::Euclidean | Metric::Manhattan => false,
        };

        if higher_better {
            results.sort_by(|a, b| {
                b.score
                    .partial_cmp(&a.score)
                    .unwrap_or(std::cmp::Ordering::Equal)
            });
        } else {
            results.sort_by(|a, b| {
                a.score
                    .partial_cmp(&b.score)
                    .unwrap_or(std::cmp::Ordering::Equal)
            });
        }

        results.truncate(self.k);
        Ok(results)
    }
}

pub struct VecLite {
    pub storage: Storage,
    pub metric: Metric,
}

impl VecLite {
    /// Open database
    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
        Self::open_with_metric(path, Metric::Cosine)
    }

    /// Open database with specific metric
    pub fn open_with_metric<P: AsRef<Path>>(path: P, metric: Metric) -> Result<Self> {
        let storage = Storage::open(path)?;
        Ok(Self { storage, metric })
    }

    /// Insert single vector
    pub fn insert(
        &mut self,
        id: &str,
        vector: Vec<f32>,
        metadata: Option<serde_json::Value>,
    ) -> Result<()> {
        self.insert_with_time(id, vector, metadata, None)
    }

    /// Insert vector with time
    pub fn insert_with_time(
        &mut self,
        id: &str,
        vector: Vec<f32>,
        metadata: Option<serde_json::Value>,
        timestamp: Option<u64>,
    ) -> Result<()> {
        let record = Record {
            id: id.to_string(),
            vector,
            metadata,
            timestamp,
        };
        self.storage.append(record)?;
        Ok(())
    }

    /// Insert multiple vectors
    pub fn insert_batch(
        &mut self,
        records: Vec<(&str, Vec<f32>, Option<serde_json::Value>)>,
    ) -> Result<()> {
        let mut recs = Vec::new();
        for (id, vector, metadata) in records {
            recs.push(Record {
                id: id.to_string(),
                vector,
                metadata,
                timestamp: None,
            });
        }
        self.storage.append_batch(recs)?;
        Ok(())
    }

    /// Simple search
    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
        SearchBuilder::new(self, query.to_vec()).top_k(k).execute()
    }

    /// Search with filters and decay
    pub fn build_search(&self, query: &[f32]) -> SearchBuilder<'_> {
        SearchBuilder::new(self, query.to_vec())
    }

    /// Database statistics
    pub fn stats(&self) -> Result<(usize, usize)> {
        let stats = self.storage.stats()?;
        Ok(stats)
    }
}