second_brain_core/
embedding.rs1use std::sync::Mutex;
2
3use anyhow::{Context, Result};
4use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
5
6pub struct Embedder {
7 model: Mutex<TextEmbedding>,
8}
9
10pub fn query_prompt(text: &str) -> String {
11 format!("Represent this sentence for searching relevant passages: {text}")
12}
13
14impl Embedder {
15 pub fn new() -> Result<Self> {
16 let model = TextEmbedding::try_new(
17 InitOptions::new(EmbeddingModel::BGESmallENV15).with_show_download_progress(true),
18 )
19 .context("initializing embedding model")?;
20
21 Ok(Self {
22 model: Mutex::new(model),
23 })
24 }
25
26 pub fn embed(&self, text: &str) -> Result<Vec<f32>> {
27 let mut model = self.model.lock().unwrap();
28 let results = model
29 .embed(vec![text], None)
30 .context("generating embedding")?;
31
32 results
33 .into_iter()
34 .next()
35 .ok_or_else(|| anyhow::anyhow!("no embedding returned"))
36 }
37
38 pub fn embed_query(&self, text: &str) -> Result<Vec<f32>> {
41 self.embed(&query_prompt(text))
42 }
43
44 pub fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
45 let mut model = self.model.lock().unwrap();
46 let owned: Vec<String> = texts.iter().map(|t| t.to_string()).collect();
47 let refs: Vec<&str> = owned.iter().map(|s| s.as_str()).collect();
48 model
49 .embed(refs, None)
50 .context("generating batch embeddings")
51 }
52
53 pub fn dimension(&self) -> usize {
54 384
55 }
56}