repograph_core/search/
embed.rs1use std::path::Path;
10
11use crate::search::index::Embedder;
12
13pub const MODEL_ID: &str = "bge-small-en-v1.5";
16
17#[cfg(feature = "semantic")]
29pub fn create(model_cache_dir: &Path) -> Result<Box<dyn Embedder>, String> {
30 imp::FastEmbedder::new(model_cache_dir).map(|e| Box::new(e) as Box<dyn Embedder>)
31}
32
33#[cfg(not(feature = "semantic"))]
39pub fn create(_model_cache_dir: &Path) -> Result<Box<dyn Embedder>, String> {
40 Err(
41 "built without semantic support — rebuild with `--features semantic` for embeddings"
42 .to_string(),
43 )
44}
45
46#[cfg(feature = "semantic")]
47mod imp {
48 use std::path::Path;
49
50 use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
51
52 use super::MODEL_ID;
53 use crate::search::index::Embedder;
54
55 pub struct FastEmbedder {
56 model: TextEmbedding,
57 }
58
59 impl FastEmbedder {
60 pub fn new(model_cache_dir: &Path) -> Result<Self, String> {
61 let options = InitOptions::new(EmbeddingModel::BGESmallENV15)
62 .with_cache_dir(model_cache_dir.to_path_buf())
63 .with_show_download_progress(false);
64 let model = TextEmbedding::try_new(options).map_err(|e| e.to_string())?;
65 Ok(Self { model })
66 }
67 }
68
69 impl Embedder for FastEmbedder {
70 fn model_id(&self) -> &str {
71 MODEL_ID
72 }
73
74 fn embed(&mut self, texts: &[String]) -> Result<Vec<Vec<f32>>, String> {
75 let docs: Vec<&str> = texts.iter().map(String::as_str).collect();
76 self.model.embed(docs, None).map_err(|e| e.to_string())
77 }
78 }
79}