use anyhow::Result;
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
const MODEL: EmbeddingModel = EmbeddingModel::AllMiniLML6V2;
pub struct Embedder {
model: TextEmbedding,
}
impl Embedder {
pub fn new() -> Result<Self> {
if !model_is_cached() {
let dir = std::env::var("FASTEMBED_CACHE_DIR")
.unwrap_or_else(|_| ".fastembed_cache".to_string());
eprintln!(
"[mcpkill] First run: downloading embedding model (~23 MB) to {dir} …"
);
}
let model =
TextEmbedding::try_new(InitOptions::new(MODEL).with_show_download_progress(true))?;
Ok(Self { model })
}
pub fn embed(&self, text: &str) -> Result<Vec<f32>> {
let mut results = self.model.embed(vec![text], None)?;
Ok(results.remove(0))
}
pub fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
self.model.embed(texts.to_vec(), None)
}
}
pub fn model_is_cached() -> bool {
let cache_dir = std::env::var("FASTEMBED_CACHE_DIR")
.unwrap_or_else(|_| ".fastembed_cache".to_string());
let model_dir = format!("{cache_dir}/models--Qdrant--all-MiniLM-L6-v2-onnx");
std::path::Path::new(&model_dir).exists()
}