use std::sync::Arc;
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
use once_cell::sync::OnceCell;
use crate::error::{Error, Result};
use super::Embedder;
static MODEL: OnceCell<Arc<TextEmbedding>> = OnceCell::new();
fn shared_model() -> Result<Arc<TextEmbedding>> {
MODEL
.get_or_try_init(|| {
let model = TextEmbedding::try_new(
InitOptions::new(EmbeddingModel::BGESmallENV15).with_show_download_progress(true),
)
.map_err(|e| Error::Embed(e.to_string()))?;
Ok(Arc::new(model))
})
.map(Arc::clone)
}
#[derive(Clone)]
pub struct BgeEmbedder {
model: Arc<TextEmbedding>,
}
impl BgeEmbedder {
pub fn new() -> Result<Self> {
Ok(Self {
model: shared_model()?,
})
}
}
impl Embedder for BgeEmbedder {
fn embed(&self, text: &str) -> Result<Vec<f32>> {
let mut out = self
.model
.embed(vec![text], None)
.map_err(|e| Error::Embed(e.to_string()))?;
Ok(out.remove(0))
}
fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
self.model
.embed(texts.to_vec(), None)
.map_err(|e| Error::Embed(e.to_string()))
}
fn dimensions(&self) -> usize {
384
}
}