use std::path::Path;
use std::sync::Mutex;
use anyhow::{Context, Result};
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
use super::{Embedder, EmbedderKind};
const ALL_MINILM_L6_V2_DIM: usize = 384;
pub struct NeuralEmbedder {
model: Mutex<TextEmbedding>,
}
impl NeuralEmbedder {
pub fn new(cache_dir: Option<&Path>) -> Result<Self> {
let mut opts = InitOptions::new(EmbeddingModel::AllMiniLML6V2);
if let Some(dir) = cache_dir {
opts.cache_dir = dir.to_path_buf();
}
opts.show_download_progress = false;
let model = TextEmbedding::try_new(opts)
.context("failed to initialize fastembed TextEmbedding (all-MiniLM-L6-v2)")?;
Ok(Self {
model: Mutex::new(model),
})
}
}
impl Embedder for NeuralEmbedder {
fn kind(&self) -> EmbedderKind {
EmbedderKind::Neural
}
fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
if texts.is_empty() {
return Ok(Vec::new());
}
let owned: Vec<&str> = texts.to_vec();
let mut guard = self
.model
.lock()
.map_err(|e| anyhow::anyhow!("fastembed model mutex poisoned: {e}"))?;
let vectors = guard
.embed(owned, None)
.context("fastembed embed() failed")?;
Ok(vectors)
}
fn dim(&self) -> usize {
ALL_MINILM_L6_V2_DIM
}
}
#[cfg(test)]
mod tests {
use super::*;
fn workspace_cache_dir() -> std::path::PathBuf {
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join(".fastembed_cache")
}
#[test]
fn neural_embedder_loads_or_skips() {
let cache = workspace_cache_dir();
match NeuralEmbedder::new(Some(&cache)) {
Ok(e) => {
assert_eq!(e.kind(), EmbedderKind::Neural);
assert_eq!(e.dim(), 384);
let vecs = e
.embed_batch(&["hello world", "fn authenticate(user: User)"])
.expect("embed_batch failed on loaded model");
assert_eq!(vecs.len(), 2);
for v in &vecs {
assert_eq!(v.len(), 384);
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
assert!(
(norm - 1.0).abs() < 0.01,
"fastembed vector not normalized: norm={norm}"
);
}
println!("neural embedder: OK (384d, normalized)");
}
Err(e) => {
println!("neural embedder skipped (model not available): {e:#}");
}
}
}
#[test]
fn neural_embedder_empty_batch_is_ok() {
let cache = workspace_cache_dir();
if let Ok(e) = NeuralEmbedder::new(Some(&cache)) {
let vecs = e.embed_batch(&[]).unwrap();
assert!(vecs.is_empty());
} else {
println!("neural_embedder_empty_batch_is_ok skipped (model not available)");
}
}
}