vectoria-core 0.1.3

Embedded hybrid search engine core — BM25 + vector + behavioral signals
use bm25::{Language, SearchEngine, SearchEngineBuilder};
use std::sync::Mutex;

#[derive(Default)]
struct BM25Inner {
    corpus: Vec<(String, String)>,
    engine: Option<SearchEngine<u32>>,
}

#[derive(Default)]
pub struct Bm25Index {
    inner: Mutex<BM25Inner>,
}

impl Bm25Index {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn upsert(&self, id: &str, text: &str) {
        let mut inner = self.inner.lock().unwrap();
        if let Some(pos) = inner.corpus.iter().position(|(k, _)| k == id) {
            inner.corpus[pos].1 = text.to_string();
        } else {
            inner.corpus.push((id.to_string(), text.to_string()));
        }
        inner.engine = None;
    }

    pub fn remove(&self, id: &str) {
        let mut inner = self.inner.lock().unwrap();
        inner.corpus.retain(|(k, _)| k != id);
        inner.engine = None;
    }

    pub fn search(&self, query: &str, limit: usize) -> Vec<(String, f32)> {
        let mut inner = self.inner.lock().unwrap();
        if inner.corpus.is_empty() {
            return Vec::new();
        }
        if inner.engine.is_none() {
            let texts: Vec<&str> = inner.corpus.iter().map(|(_, t)| t.as_str()).collect();
            inner.engine = Some(SearchEngineBuilder::<u32>::with_corpus(Language::English, texts).build());
        }
        let engine = inner.engine.as_ref().unwrap();
        engine
            .search(query, limit)
            .into_iter()
            .filter_map(|r| {
                inner.corpus.get(r.document.id as usize).map(|(id, _)| (id.clone(), r.score))
            })
            .collect()
    }

    pub fn len(&self) -> usize {
        self.inner.lock().unwrap().corpus.len()
    }

    pub fn is_empty(&self) -> bool {
        self.inner.lock().unwrap().corpus.is_empty()
    }
}