Skip to main content

repograph_core/search/
embed.rs

1//! Local embedding backend.
2//!
3//! Compiled only with the `semantic` feature. Without it, this module exposes a
4//! single `create` that reports the feature is absent, so the caller degrades to
5//! lexical retrieval with a clear notice. With it, `create` initializes a
6//! `fastembed` model (downloaded once into the data-dir model cache) and wraps
7//! it as an [`Embedder`].
8
9use std::path::Path;
10
11use crate::search::index::Embedder;
12
13/// Stable identifier of the embedding model, recorded next to each vector so a
14/// model change invalidates the vector segment.
15pub const MODEL_ID: &str = "bge-small-en-v1.5";
16
17/// Try to construct the local embedder.
18///
19/// - Built without the `semantic` feature: returns `Err` with a notice that the
20///   binary has no semantic support, so the caller falls back to lexical.
21/// - Built with it: returns the initialized embedder, or `Err` if model
22///   download/initialization failed.
23///
24/// # Errors
25///
26/// Returns a human-readable message describing why semantic retrieval is
27/// unavailable.
28#[cfg(feature = "semantic")]
29pub fn create(model_cache_dir: &Path) -> Result<Box<dyn Embedder>, String> {
30    imp::FastEmbedder::new(model_cache_dir).map(|e| Box::new(e) as Box<dyn Embedder>)
31}
32
33/// Stub when the `semantic` feature is disabled — always reports unavailability.
34///
35/// # Errors
36///
37/// Always returns a notice that the build lacks semantic support.
38#[cfg(not(feature = "semantic"))]
39pub fn create(_model_cache_dir: &Path) -> Result<Box<dyn Embedder>, String> {
40    Err(
41        "built without semantic support — rebuild with `--features semantic` for embeddings"
42            .to_string(),
43    )
44}
45
46#[cfg(feature = "semantic")]
47mod imp {
48    use std::path::Path;
49
50    use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
51
52    use super::MODEL_ID;
53    use crate::search::index::Embedder;
54
55    pub struct FastEmbedder {
56        model: TextEmbedding,
57    }
58
59    impl FastEmbedder {
60        pub fn new(model_cache_dir: &Path) -> Result<Self, String> {
61            let options = InitOptions::new(EmbeddingModel::BGESmallENV15)
62                .with_cache_dir(model_cache_dir.to_path_buf())
63                .with_show_download_progress(false);
64            let model = TextEmbedding::try_new(options).map_err(|e| e.to_string())?;
65            Ok(Self { model })
66        }
67    }
68
69    impl Embedder for FastEmbedder {
70        fn model_id(&self) -> &str {
71            MODEL_ID
72        }
73
74        fn embed(&mut self, texts: &[String]) -> Result<Vec<Vec<f32>>, String> {
75            let docs: Vec<&str> = texts.iter().map(String::as_str).collect();
76            self.model.embed(docs, None).map_err(|e| e.to_string())
77        }
78    }
79}