ski/embed/fast.rs
1//! Real embeddings via fastembed (ONNX). Compiled with the `fastembed` feature,
2//! which is on by default; build `--no-default-features` to drop it for the offline
3//! bag-of-words lane. Default model: bge-small-en-v1.5; lite alt: all-MiniLM-L6-v2
4//! (quantized).
5
6use crate::embed::{EmbedKind, Embedder};
7use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
8
9const BGE_QUERY_PREFIX: &str = "Represent this sentence for searching relevant passages: ";
10
11pub struct FastEmbedder {
12 model: TextEmbedding,
13 tag: String,
14 bge: bool,
15}
16
17/// The fastembed model (and whether it is a bge-family asymmetric model) for a
18/// recognized id, `None` otherwise. Split from [`FastEmbedder::try_for`] so
19/// recognition is answerable without constructing the embedder — construction
20/// downloads the model files on a cold cache.
21fn model_for(model: &str) -> Option<(EmbeddingModel, bool)> {
22 match model {
23 "bge-small-en-v1.5" => Some((EmbeddingModel::BGESmallENV15, true)),
24 "bge-base-en-v1.5" => Some((EmbeddingModel::BGEBaseENV15, true)),
25 "all-MiniLM-L6-v2-q" => Some((EmbeddingModel::AllMiniLML6V2Q, false)),
26 "all-MiniLM-L6-v2" => Some((EmbeddingModel::AllMiniLML6V2, false)),
27 _ => None,
28 }
29}
30
31impl FastEmbedder {
32 /// Whether `model` is a recognized fastembed model id (i.e. [`try_for`]
33 /// would return `Some`), with no side effects — no model download.
34 ///
35 /// [`try_for`]: Self::try_for
36 pub fn recognized(model: &str) -> bool {
37 model_for(model).is_some()
38 }
39
40 /// `Some` if `model` is a recognized fastembed model id, else `None` so the
41 /// caller can fall back to the bag-of-words embedder.
42 pub fn try_for(model: &str) -> anyhow::Result<Option<Self>> {
43 let Some((em, bge)) = model_for(model) else {
44 return Ok(None);
45 };
46 let te = TextEmbedding::try_new(
47 InitOptions::new(em).with_cache_dir(crate::paths::model_cache_dir()),
48 )?;
49 Ok(Some(Self {
50 model: te,
51 tag: model.to_string(),
52 bge,
53 }))
54 }
55}
56
57impl Embedder for FastEmbedder {
58 fn id(&self) -> String {
59 self.tag.clone()
60 }
61
62 fn embed(&self, texts: &[String], kind: EmbedKind) -> anyhow::Result<Vec<Vec<f32>>> {
63 let prepped: Vec<String> = if self.bge && kind == EmbedKind::Query {
64 texts
65 .iter()
66 .map(|t| format!("{BGE_QUERY_PREFIX}{t}"))
67 .collect()
68 } else {
69 texts.to_vec()
70 };
71 self.model.embed(prepped, None)
72 }
73
74 // Tuned by sweeping the anthropic/skills corpus (scoped + global) against the
75 // live installed skill set. bge is anisotropic: unrelated prompts still cosine
76 // ~0.50-0.62 and genuine matches sit ~0.66+, so the floor is set at the knee
77 // (0.64) — it rejects the noise tail while keeping real hits, trading one
78 // borderline positive for two fewer false injections. The lone residual leak
79 // is genuinely on-topic (a git skill on a git prompt). Margin 0.12 keeps only
80 // near-peers of the leader. MiniLM shares this family tuning until it gets its
81 // own corpus pass; it is an opt-in lite alternative.
82 fn min_similarity(&self) -> f32 {
83 0.64
84 }
85
86 fn score_margin(&self) -> f32 {
87 0.12
88 }
89}