Skip to main content

ski/embed/
fast.rs

1//! Real embeddings via fastembed (ONNX). Compiled with the `fastembed` feature,
2//! which is on by default; build `--no-default-features` to drop it for the offline
3//! bag-of-words lane. Default model: bge-small-en-v1.5; lite alt: all-MiniLM-L6-v2
4//! (quantized).
5
6use crate::embed::{EmbedKind, Embedder};
7use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
8
9const BGE_QUERY_PREFIX: &str = "Represent this sentence for searching relevant passages: ";
10
11pub struct FastEmbedder {
12    model: TextEmbedding,
13    tag: String,
14    bge: bool,
15}
16
17/// The fastembed model (and whether it is a bge-family asymmetric model) for a
18/// recognized id, `None` otherwise. Split from [`FastEmbedder::try_for`] so
19/// recognition is answerable without constructing the embedder — construction
20/// downloads the model files on a cold cache.
21fn model_for(model: &str) -> Option<(EmbeddingModel, bool)> {
22    match model {
23        "bge-small-en-v1.5" => Some((EmbeddingModel::BGESmallENV15, true)),
24        "bge-base-en-v1.5" => Some((EmbeddingModel::BGEBaseENV15, true)),
25        "all-MiniLM-L6-v2-q" => Some((EmbeddingModel::AllMiniLML6V2Q, false)),
26        "all-MiniLM-L6-v2" => Some((EmbeddingModel::AllMiniLML6V2, false)),
27        _ => None,
28    }
29}
30
31impl FastEmbedder {
32    /// Whether `model` is a recognized fastembed model id (i.e. [`try_for`]
33    /// would return `Some`), with no side effects — no model download.
34    ///
35    /// [`try_for`]: Self::try_for
36    pub fn recognized(model: &str) -> bool {
37        model_for(model).is_some()
38    }
39
40    /// `Some` if `model` is a recognized fastembed model id, else `None` so the
41    /// caller can fall back to the bag-of-words embedder.
42    pub fn try_for(model: &str) -> anyhow::Result<Option<Self>> {
43        let Some((em, bge)) = model_for(model) else {
44            return Ok(None);
45        };
46        let te = TextEmbedding::try_new(
47            InitOptions::new(em).with_cache_dir(crate::paths::model_cache_dir()),
48        )?;
49        Ok(Some(Self {
50            model: te,
51            tag: model.to_string(),
52            bge,
53        }))
54    }
55}
56
57impl Embedder for FastEmbedder {
58    fn id(&self) -> String {
59        self.tag.clone()
60    }
61
62    fn embed(&self, texts: &[String], kind: EmbedKind) -> anyhow::Result<Vec<Vec<f32>>> {
63        let prepped: Vec<String> = if self.bge && kind == EmbedKind::Query {
64            texts
65                .iter()
66                .map(|t| format!("{BGE_QUERY_PREFIX}{t}"))
67                .collect()
68        } else {
69            texts.to_vec()
70        };
71        self.model.embed(prepped, None)
72    }
73
74    // Tuned by sweeping the anthropic/skills corpus (scoped + global) against the
75    // live installed skill set. bge is anisotropic: unrelated prompts still cosine
76    // ~0.50-0.62 and genuine matches sit ~0.66+, so the floor is set at the knee
77    // (0.64) — it rejects the noise tail while keeping real hits, trading one
78    // borderline positive for two fewer false injections. The lone residual leak
79    // is genuinely on-topic (a git skill on a git prompt). Margin 0.12 keeps only
80    // near-peers of the leader. MiniLM shares this family tuning until it gets its
81    // own corpus pass; it is an opt-in lite alternative.
82    fn min_similarity(&self) -> f32 {
83        0.64
84    }
85
86    fn score_margin(&self) -> f32 {
87        0.12
88    }
89}