lunaris_embed/fastembed.rs
1//! `FastembedEmbedder` — ONNX-backed EmbeddingGemma 300M via fastembed-rs.
2//!
3//! ## v0 forward-pass strategy
4//!
5//! Where [`crate::candle_gemma::CandleEmbeddingGemma`] reads the
6//! `embed_tokens.weight` matrix and mean-pools the first-layer token
7//! embeddings (a pragmatic "lexical" shortcut), this backend runs the
8//! **full ONNX forward pass** of `EmbeddingGemma300M` via `fastembed::TextEmbedding`
9//! (which sits on top of `ort` 2.x — the ONNX Runtime Rust binding). The graph
10//! emits `sentence_embedding` already mean-pooled inside the ONNX model; we
11//! defensively L2-normalise on the way out because the graph is NOT guaranteed
12//! to emit unit vectors across all model variants, and Moon `FT.SEARCH` cosine
13//! distance requires unit-norm rows.
14//!
15//! Weights auto-download on first call to `FastembedEmbedder::new` via
16//! `hf-hub` (TLS-enforced — `hf-hub-native-tls` feature). Cache directory
17//! defaults to `~/.cache/lunaris/models/fastembed/` (shares parent with the
18//! candle path's `~/.cache/lunaris/models/embedding-gemma-300m/`, so
19//! `rm -rf ~/.cache/lunaris/models/` wipes both backends in one go).
20//!
21//! ## `&mut self` -> `&self` adapter
22//!
23//! `fastembed::TextEmbedding::embed` is `&mut self` and synchronous (CPU-bound
24//! ORT call). The [`Embedder`] trait is `&self` and async. We bridge with
25//! `Arc<Inner { Mutex<TextEmbedding> }>`:
26//! - The Mutex is `parking_lot::Mutex` (CLAUDE.md lock discipline — never
27//! `std::sync::Mutex` for new code).
28//! - The lock is acquired **inside** `tokio::task::spawn_blocking`, never held
29//! across `.await` (CLAUDE.md: "never hold a lock across `.await`").
30//! - This serializes concurrent `embed_batch` calls per `FastembedEmbedder`
31//! instance. That's fine: fastembed batches internally at `batch_size = 256`
32//! and Lunaris ingest is single-writer-per-scope, so the Mutex never
33//! meaningfully contends. Concurrent readers wanting parallelism construct
34//! multiple `FastembedEmbedder` instances (one ORT session per instance).
35//!
36//! ## Defensive L2-normalize
37//!
38//! Each output row is L2-normalised on the host side, matching the candle
39//! path's invariant (and the trait-level expectation). If `l2 < f64::EPSILON`
40//! (a degenerate all-zeros graph output for an empty/pad-only input) we return
41//! the row unchanged — same behaviour as `candle_gemma.rs`.
42//!
43//! ## Failure modes
44//!
45//! | Condition | Returned error |
46//! |--------------------------------------------------------|-------------------------------------------------------------------------------------|
47//! | HF Hub download failure (no network, 4xx, TLS) | `LunarisError::Storage(StorageError::Backend("fastembed: ..."))` (anyhow rewrap) |
48//! | ORT session init failure (corrupt cache, bad ONNX) | `LunarisError::Storage(StorageError::Backend("fastembed: ..."))` |
49//! | `TextEmbedding::embed` call failure (tokenizer, ORT) | `LunarisError::Storage(StorageError::Backend("fastembed: ..."))` |
50//! | `tokio::task::spawn_blocking` join failure (panic) | `LunarisError::Storage(StorageError::Backend("fastembed join: ..."))` |
51//! | First-call row width ≠ [`FASTEMBED_GEMMA_DIM`] | `LunarisError::Storage(StorageError::Backend("fastembed: dim mismatch ..."))` |
52//! | Mutex poisoned | Cannot occur — `parking_lot::Mutex` is poison-free by design. |
53
54use std::path::PathBuf;
55use std::sync::Arc;
56
57use async_trait::async_trait;
58use fastembed::{
59 EmbeddingModel, InitOptions, InitOptionsUserDefined, Pooling, QuantizationMode, TextEmbedding,
60 TokenizerFiles, UserDefinedEmbeddingModel,
61};
62use lunaris_core::{Embedder, LunarisError, StorageError};
63use parking_lot::Mutex;
64
65/// Output dimensionality of `EmbeddingGemma300M`. Fixed at 768d — matches
66/// [`crate::candle_gemma::EMBEDDING_GEMMA_DIM`] so the two backends are
67/// drop-in replacements through the `Embedder` trait surface.
68pub const FASTEMBED_GEMMA_DIM: usize = 768;
69
70/// Maximum input tokens per request (EmbeddingGemma context window). Mirrors
71/// [`crate::candle_gemma::EMBEDDING_GEMMA_MAX_TOKENS`] for parity; truncation
72/// is handled inside fastembed's tokenizer wrapper (we don't need to truncate
73/// on the host side as candle_gemma does).
74pub const FASTEMBED_GEMMA_MAX_TOKENS: usize = 2048;
75
76/// Environment variable that overrides the default fastembed cache directory.
77/// Mirrors the `LUNARIS_OLLAMA_URL` / `LUNARIS_OLLAMA_MODEL` env-override
78/// convention established in `crate::ollama` (feature-gated).
79pub const FASTEMBED_CACHE_DIR_ENV: &str = "LUNARIS_FASTEMBED_CACHE_DIR";
80
81// Phase 20 Plan 20-01 — execution-provider plumbing lives in a sibling module
82// to keep this file under the project's split threshold. Re-exported so the
83// public API surface (`lunaris_embed::fastembed::ExecutionPreference`) stays
84// unchanged for downstream callers.
85pub use crate::fastembed_exec::{
86 ExecutionPreference, FASTEMBED_EXECUTION_ENV, execution_from_env, parse_execution,
87};
88use crate::fastembed_exec::{build_execution_providers, requests_accelerator};
89
90/// Construction options for [`FastembedEmbedder`].
91///
92/// `Default` resolves `cache_dir` in priority order:
93/// 1. `$LUNARIS_FASTEMBED_CACHE_DIR` if set (operator-controllable for CI / sandboxes);
94/// 2. `~/.cache/lunaris/models/fastembed/` (shares parent with the candle cache);
95/// 3. `./lunaris/models/fastembed/` as a last-ditch fallback when `dirs::cache_dir`
96/// returns `None` (rare — only on platforms without a HOME concept).
97///
98/// `show_download_progress` defaults to `false` so server processes don't
99/// spew progress bars into structured logs. Set `true` for local CLI use.
100#[derive(Clone, Debug)]
101pub struct FastembedEmbedderOpts {
102 /// Filesystem path where fastembed stores auto-downloaded ONNX weights.
103 /// `None` means "resolve via the env-override → `dirs::cache_dir()` chain
104 /// at `Default` time"; once `Default` runs this is always `Some(...)`.
105 pub cache_dir: Option<PathBuf>,
106 /// Forwarded to `fastembed::InitOptions::with_show_download_progress`.
107 /// Default `false` to keep server logs clean.
108 pub show_download_progress: bool,
109 /// ORT execution-provider preference (Phase 20 Plan 20-01). `Default`
110 /// reads `$LUNARIS_FASTEMBED_EXECUTION`; unknown values resolve to `Cpu`
111 /// with a `tracing::warn`. Set programmatically when callers want to
112 /// override the environment.
113 pub execution: ExecutionPreference,
114}
115
116impl Default for FastembedEmbedderOpts {
117 fn default() -> Self {
118 Self {
119 cache_dir: Some(resolve_default_cache_dir()),
120 show_download_progress: false,
121 execution: execution_from_env(),
122 }
123 }
124}
125
126/// Resolve the default fastembed cache directory. See
127/// [`FastembedEmbedderOpts`] doc for the precedence chain.
128fn resolve_default_cache_dir() -> PathBuf {
129 if let Ok(env_dir) = std::env::var(FASTEMBED_CACHE_DIR_ENV)
130 && !env_dir.is_empty()
131 {
132 return PathBuf::from(env_dir);
133 }
134 let cache_root = dirs::cache_dir().unwrap_or_else(|| PathBuf::from("."));
135 cache_root.join("lunaris").join("models").join("fastembed")
136}
137
138/// ONNX-backed `EmbeddingGemma 300M` embedder. See module-level doc for the
139/// adapter strategy and failure-mode table.
140#[derive(Clone)]
141pub struct FastembedEmbedder {
142 /// Cheap-to-clone handle; the heavy ORT session lives inside the `Arc`.
143 inner: Arc<Inner>,
144}
145
146impl std::fmt::Debug for FastembedEmbedder {
147 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148 f.debug_struct("FastembedEmbedder")
149 .field("dim", &self.inner.dim)
150 .field("cache_dir", &self.inner.cache_dir)
151 .finish()
152 }
153}
154
155struct Inner {
156 /// The ONNX session. `embed` is `&mut self` so the lock IS the
157 /// serialisation point — see module doc.
158 model: Mutex<TextEmbedding>,
159 /// Retained for `Debug` + future operator triage tracing. Not used in the
160 /// hot path. For the user-defined-model path this is `PathBuf::new()`
161 /// (empty) since the operator hands us bytes directly — there is no
162 /// on-disk cache by definition.
163 cache_dir: PathBuf,
164 /// Embedding dimensionality. For the default path this is
165 /// [`FASTEMBED_GEMMA_DIM`]; for the user-defined path it is the
166 /// operator-declared `dim` from [`FastembedUserDefinedOpts`].
167 ///
168 /// Made runtime (rather than the compile-time constant the Phase 19
169 /// implementation read) by Plan 20-01 Task 3 so bring-your-own-model
170 /// callers see their own dim through the [`Embedder`] trait surface.
171 dim: usize,
172}
173
174impl FastembedEmbedder {
175 /// Construct a real ONNX-backed embedder. On first call this triggers an
176 /// HF Hub download of the EmbeddingGemma 300M weights (~600 MB) into
177 /// `opts.cache_dir`; subsequent calls hit the cache.
178 ///
179 /// Construction is **synchronous** because fastembed's `try_new` is
180 /// itself synchronous — the I/O happens inline. Callers that need to
181 /// avoid stalling the runtime should wrap this in
182 /// `tokio::task::spawn_blocking` at the call site; we deliberately do
183 /// NOT wrap inside `new` so the error mapping stays straightforward and
184 /// the caller controls the spawn context.
185 pub fn new(opts: FastembedEmbedderOpts) -> Result<Self, LunarisError> {
186 let cache_dir = opts.cache_dir.unwrap_or_else(resolve_default_cache_dir);
187 let execution = opts.execution.clone();
188
189 // T-19-01-03 mitigation: log model + cache_dir at INFO so operators can
190 // diff env-to-env. Do NOT log inputs anywhere in this module
191 // (T-19-01-04).
192 tracing::info!(
193 backend = "fastembed",
194 model = "EmbeddingGemma300M",
195 cache_dir = %cache_dir.display(),
196 execution = ?execution,
197 "fastembed embedder constructing"
198 );
199
200 let build = |providers_enabled: bool| -> Result<TextEmbedding, anyhow::Error> {
201 let mut init = InitOptions::new(EmbeddingModel::EmbeddingGemma300M)
202 .with_cache_dir(cache_dir.clone())
203 .with_show_download_progress(opts.show_download_progress);
204 if providers_enabled {
205 init = init.with_execution_providers(build_execution_providers(&execution));
206 }
207 TextEmbedding::try_new(init)
208 };
209
210 let model = try_with_fallback(&execution, build)?;
211
212 // Best-effort label: fastembed's `Session` doesn't expose the active
213 // EP, so we report the requested preference here. The fallback path
214 // emits its own `warn` if it kicked in, which is the durable signal
215 // for "you asked for accelerator but got CPU".
216 let resolved = execution.clone();
217 tracing::info!(
218 backend = "fastembed",
219 model = "EmbeddingGemma300M",
220 execution = ?resolved,
221 "fastembed embedder initialized"
222 );
223
224 Ok(Self {
225 inner: Arc::new(Inner {
226 model: Mutex::new(model),
227 cache_dir,
228 dim: FASTEMBED_GEMMA_DIM,
229 }),
230 })
231 }
232
233 /// Bring-your-own ONNX model (Phase 20 Plan 20-01). The operator supplies
234 /// the model bytes + tokenizer bytes in [`FastembedUserDefinedOpts`] and
235 /// declares the output dimensionality (`dim`); the constructor wires
236 /// fastembed's [`UserDefinedEmbeddingModel`] / `InitOptionsUserDefined`
237 /// and returns a ready embedder.
238 ///
239 /// # Trust requirement
240 ///
241 /// The ONNX bytes execute in-process through ONNX Runtime. They MUST come
242 /// from a trusted source (operator-controlled model registry, not
243 /// user-uploaded content) — Lunaris performs no graph validation. See
244 /// `.planning/phases/20-fastembed-adoption/20-01-PLAN.md` threat
245 /// `T-20-01-01`.
246 ///
247 /// # Storage-dim constraint
248 ///
249 /// Lunaris's default storage schema is **768-d** (Moon FT index + Postgres
250 /// `vector(768)` column). Operators bringing a model whose `dim != 768`
251 /// MUST also reindex storage — this is the storage-side migration covered
252 /// by Plan 20-03. Lunaris does NOT enforce dim parity between embedder
253 /// and storage on the hot path; a mismatch surfaces as a backend insert
254 /// error at first ingest.
255 ///
256 /// # Example
257 ///
258 /// ```no_run
259 /// use std::sync::Arc;
260 /// use lunaris_embed::fastembed::{
261 /// FastembedEmbedder, FastembedUserDefinedOpts, PoolingMode, ExecutionPreference,
262 /// };
263 ///
264 /// # fn demo() -> Result<(), Box<dyn std::error::Error>> {
265 /// let onnx = std::fs::read("models/helios-finetuned.onnx")?;
266 /// let tok = std::fs::read("models/helios-finetuned/tokenizer.json")?;
267 /// let embedder = FastembedEmbedder::from_user_defined(FastembedUserDefinedOpts {
268 /// onnx_file: onnx,
269 /// tokenizer_file: tok,
270 /// tokenizer_config_file: None,
271 /// special_tokens_map_file: None,
272 /// config_file: None,
273 /// dim: 1024, // MUST match the ONNX model's output dim
274 /// pooling: PoolingMode::Mean,
275 /// execution: ExecutionPreference::Cpu,
276 /// max_length: 2048,
277 /// })?;
278 /// // let lunaris = Lunaris::open(url).await?.with_embedder(Arc::new(embedder));
279 /// let _ = Arc::new(embedder);
280 /// # Ok(()) }
281 /// ```
282 pub fn from_user_defined(opts: FastembedUserDefinedOpts) -> Result<Self, LunarisError> {
283 if opts.onnx_file.is_empty() {
284 return Err(LunarisError::Storage(StorageError::Backend(
285 "fastembed: from_user_defined called with empty onnx_file bytes".to_string(),
286 )));
287 }
288 if opts.tokenizer_file.is_empty() {
289 return Err(LunarisError::Storage(StorageError::Backend(
290 "fastembed: from_user_defined called with empty tokenizer_file bytes".to_string(),
291 )));
292 }
293 if opts.dim == 0 {
294 return Err(LunarisError::Storage(StorageError::Backend(
295 "fastembed: from_user_defined called with dim = 0".to_string(),
296 )));
297 }
298
299 let execution = opts.execution.clone();
300 let dim = opts.dim;
301 let max_length = opts.max_length;
302
303 tracing::info!(
304 backend = "fastembed",
305 model = "user-defined",
306 dim,
307 execution = ?execution,
308 "fastembed user-defined embedder constructing"
309 );
310
311 // The struct is non-`Clone` once we move bytes in. Construct once;
312 // fallback retry below requires a second model — for the user-defined
313 // path we keep buffers around in `Option<...>` so the fallback path
314 // can reuse them without double-copying multi-MB onnx blobs.
315 let user_model = UserDefinedEmbeddingModel {
316 onnx_file: opts.onnx_file,
317 external_initializers: Vec::new(),
318 tokenizer_files: TokenizerFiles {
319 tokenizer_file: opts.tokenizer_file,
320 config_file: opts.config_file.unwrap_or_default(),
321 special_tokens_map_file: opts.special_tokens_map_file.unwrap_or_default(),
322 tokenizer_config_file: opts.tokenizer_config_file.unwrap_or_default(),
323 },
324 pooling: Some(opts.pooling.into()),
325 quantization: QuantizationMode::None,
326 output_key: None,
327 };
328
329 let model = try_user_defined_with_fallback(&execution, user_model, max_length)?;
330
331 // Best-effort label: fastembed's `Session` doesn't expose the active
332 // EP, so we report the requested preference here. The fallback path
333 // emits its own `warn` if it kicked in, which is the durable signal
334 // for "you asked for accelerator but got CPU".
335 let resolved = execution.clone();
336 tracing::info!(
337 backend = "fastembed",
338 model = "user-defined",
339 dim,
340 execution = ?resolved,
341 "fastembed user-defined embedder initialized"
342 );
343
344 Ok(Self {
345 inner: Arc::new(Inner { model: Mutex::new(model), cache_dir: PathBuf::new(), dim }),
346 })
347 }
348}
349
350/// Options for [`FastembedEmbedder::from_user_defined`]. All byte buffers are
351/// moved into the constructor — they aren't retained inside the embedder once
352/// the ONNX session has been built (the session owns its parsed graph).
353///
354/// **Storage-side dim invariant:** see the constructor's rustdoc — `dim` must
355/// match the ONNX model's output AND should match Lunaris's storage schema
356/// (default 768) unless storage is reindexed.
357#[derive(Clone, Debug)]
358pub struct FastembedUserDefinedOpts {
359 /// Raw bytes of the ONNX graph (e.g., `model.onnx`).
360 pub onnx_file: Vec<u8>,
361 /// Raw bytes of the HF-format `tokenizer.json`.
362 pub tokenizer_file: Vec<u8>,
363 /// Optional `tokenizer_config.json` bytes. Empty if `None`.
364 pub tokenizer_config_file: Option<Vec<u8>>,
365 /// Optional `special_tokens_map.json` bytes.
366 pub special_tokens_map_file: Option<Vec<u8>>,
367 /// Optional model `config.json` bytes (architecture metadata).
368 pub config_file: Option<Vec<u8>>,
369 /// Output dimensionality declared by the operator. MUST match what the
370 /// ONNX graph actually emits; a mismatch surfaces as a vector-index
371 /// rejection at the first ingest call.
372 pub dim: usize,
373 /// Pooling strategy applied to token-level embeddings to produce the
374 /// sentence vector. Mirrors fastembed's [`Pooling`] enum.
375 pub pooling: PoolingMode,
376 /// ORT execution provider preference (same enum as the default path).
377 pub execution: ExecutionPreference,
378 /// Token context window. Defaults to 2048 to match `EmbeddingGemma300M`.
379 pub max_length: usize,
380}
381
382impl Default for FastembedUserDefinedOpts {
383 fn default() -> Self {
384 Self {
385 onnx_file: Vec::new(),
386 tokenizer_file: Vec::new(),
387 tokenizer_config_file: None,
388 special_tokens_map_file: None,
389 config_file: None,
390 dim: 0,
391 pooling: PoolingMode::Mean,
392 execution: execution_from_env(),
393 max_length: FASTEMBED_GEMMA_MAX_TOKENS,
394 }
395 }
396}
397
398/// Lunaris-facing pooling enum — decouples callers from a direct
399/// [`fastembed::Pooling`] type dependency.
400///
401/// `Cls` mirrors fastembed's BERT-style first-token pooling; `Mean` is the
402/// recommended setting for sentence-similarity models (EmbeddingGemma + most
403/// BGE variants).
404#[derive(Clone, Debug, Default, PartialEq, Eq)]
405pub enum PoolingMode {
406 /// CLS-token pooling (BERT-style). Maps to [`fastembed::Pooling::Cls`].
407 Cls,
408 /// Mean pooling with attention-mask weighting. Maps to
409 /// [`fastembed::Pooling::Mean`].
410 #[default]
411 Mean,
412}
413
414impl From<PoolingMode> for Pooling {
415 fn from(m: PoolingMode) -> Self {
416 match m {
417 PoolingMode::Cls => Pooling::Cls,
418 PoolingMode::Mean => Pooling::Mean,
419 }
420 }
421}
422
423/// Try the construction closure with execution providers; on failure when an
424/// accelerator was requested, retry once with CPU only and a `tracing::warn`.
425fn try_with_fallback<F>(
426 pref: &ExecutionPreference,
427 mut build: F,
428) -> Result<TextEmbedding, LunarisError>
429where
430 F: FnMut(bool) -> Result<TextEmbedding, anyhow::Error>,
431{
432 let want_accelerator = requests_accelerator(pref);
433 match build(want_accelerator) {
434 Ok(m) => Ok(m),
435 Err(e) if want_accelerator => {
436 // T-20-01-03 mitigation: %e (Display) — don't dump full provider
437 // debug context (which may include driver paths) into logs.
438 tracing::warn!(
439 error = %e,
440 requested = ?pref,
441 "fastembed execution provider init failed, falling back to CPU"
442 );
443 build(false).map_err(anyhow_to_lunaris)
444 }
445 Err(e) => Err(anyhow_to_lunaris(e)),
446 }
447}
448
449/// User-defined variant of [`try_with_fallback`]. Owns the
450/// `UserDefinedEmbeddingModel` so the fallback retry doesn't have to clone
451/// multi-MB byte buffers — fastembed's struct is `Clone`, so we keep the
452/// owned copy in scope and pass clones in.
453fn try_user_defined_with_fallback(
454 pref: &ExecutionPreference,
455 user_model: UserDefinedEmbeddingModel,
456 max_length: usize,
457) -> Result<TextEmbedding, LunarisError> {
458 let want_accelerator = requests_accelerator(pref);
459 let build = |providers_enabled: bool, m: UserDefinedEmbeddingModel| {
460 let mut init = InitOptionsUserDefined::new().with_max_length(max_length);
461 if providers_enabled {
462 init = init.with_execution_providers(build_execution_providers(pref));
463 }
464 TextEmbedding::try_new_from_user_defined(m, init)
465 };
466
467 if want_accelerator {
468 // Keep an unconsumed clone to retry on the CPU path if the accelerator
469 // session-build fails.
470 let retry_model = user_model.clone();
471 match build(true, user_model) {
472 Ok(m) => Ok(m),
473 Err(e) => {
474 tracing::warn!(
475 error = %e,
476 requested = ?pref,
477 "fastembed (user-defined) execution provider init failed, falling back to CPU"
478 );
479 build(false, retry_model).map_err(anyhow_to_lunaris)
480 }
481 }
482 } else {
483 build(false, user_model).map_err(anyhow_to_lunaris)
484 }
485}
486
487#[async_trait]
488impl Embedder for FastembedEmbedder {
489 fn dim(&self) -> usize {
490 // Phase 20 Plan 20-01 Task 3 — read runtime dim from Inner. For the
491 // default `new()` path this is `FASTEMBED_GEMMA_DIM` (768); for the
492 // `from_user_defined` path it is operator-declared.
493 self.inner.dim
494 }
495
496 async fn embed_batch(&self, inputs: &[&str]) -> Result<Vec<Vec<f32>>, LunarisError> {
497 if inputs.is_empty() {
498 return Ok(Vec::new());
499 }
500
501 // Move owned inputs across the spawn_blocking boundary — `&str`
502 // borrows are not `'static` so we have to materialise `String`s.
503 let owned: Vec<String> = inputs.iter().map(|s| (*s).to_string()).collect();
504 let inner = self.inner.clone();
505 let expected_dim = inner.dim;
506
507 tokio::task::spawn_blocking(move || -> Result<Vec<Vec<f32>>, LunarisError> {
508 // Acquire the Mutex INSIDE the blocking closure. CLAUDE.md lock
509 // discipline: never across `.await`. `parking_lot::Mutex` is
510 // poison-free so the unwrap-like `lock()` cannot fail.
511 let raw: Vec<Vec<f32>> = {
512 let mut guard = inner.model.lock();
513 // `None` -> use fastembed's default batch size (256).
514 guard.embed(owned, None).map_err(anyhow_to_lunaris)?
515 }; // guard drops here; subsequent normalisation is lock-free.
516
517 let mut out: Vec<Vec<f32>> = Vec::with_capacity(raw.len());
518 for row in raw.into_iter() {
519 if row.len() != expected_dim {
520 return Err(LunarisError::Storage(StorageError::Backend(format!(
521 "fastembed: dim mismatch — model returned {} dims, expected {expected_dim}",
522 row.len()
523 ))));
524 }
525 out.push(l2_normalize_row(row, expected_dim));
526 }
527 Ok(out)
528 })
529 .await
530 .map_err(|e| LunarisError::Storage(StorageError::Backend(format!("fastembed join: {e}"))))?
531 }
532}
533
534/// L2-normalise a single row in place. If the row is degenerate
535/// (`l2 < f64::EPSILON`) it is returned unchanged — matches
536/// [`crate::candle_gemma`]'s behaviour and avoids dividing by zero.
537///
538/// `expected_dim` is passed for the debug-assert only; the function is
539/// dim-agnostic post Phase 20 Plan 20-01 (the user-defined model path may
540/// have `dim != 768`).
541#[inline]
542fn l2_normalize_row(row: Vec<f32>, expected_dim: usize) -> Vec<f32> {
543 let l2 = row.iter().map(|x| (*x as f64).powi(2)).sum::<f64>().sqrt();
544 if l2 > f64::EPSILON {
545 let mut out: Vec<f32> = row;
546 for v in out.iter_mut() {
547 *v = (*v as f64 / l2) as f32;
548 }
549 debug_assert_eq!(out.len(), expected_dim);
550 out
551 } else {
552 row
553 }
554}
555
556/// Bridge `anyhow::Error` (fastembed's error surface) to `LunarisError`.
557/// Mirrors the candle path's `candle_err` helper.
558#[inline]
559fn anyhow_to_lunaris(e: anyhow::Error) -> LunarisError {
560 LunarisError::Storage(StorageError::Backend(format!("fastembed: {e}")))
561}
562
563#[cfg(test)]
564mod tests {
565 use super::*;
566
567 #[test]
568 fn opts_default_resolves_to_cache_subdir() {
569 // Guard against env pollution from sibling tests (or the shell).
570 // Use `unsafe`? No — std::env::remove_var is unsafe in edition 2024;
571 // we work around by snapshotting and restoring around the assertion.
572 // Easier: assert the *suffix* path components are right and just
573 // skip the assertion if the env var is set externally (operator-set
574 // overrides are explicitly allowed by the API contract).
575 let env_override = std::env::var(FASTEMBED_CACHE_DIR_ENV).ok();
576 if env_override.is_some() {
577 // Operator override active — Default returns that, by contract.
578 return;
579 }
580 let opts = FastembedEmbedderOpts::default();
581 let path = opts.cache_dir.expect("default sets a cache_dir");
582 let s = path.to_string_lossy().to_string();
583 assert!(
584 s.contains("lunaris") && s.contains("models") && s.contains("fastembed"),
585 "default cache_dir should include the v0 cache layout, got: {s}"
586 );
587 }
588
589 #[test]
590 fn dim_constant_is_768() {
591 assert_eq!(FASTEMBED_GEMMA_DIM, 768);
592 }
593
594 #[test]
595 fn l2_normalize_unit_vector() {
596 // Construct a non-unit vector; expect ‖result‖₂ ≈ 1.
597 let mut row = vec![0.0_f32; FASTEMBED_GEMMA_DIM];
598 row[0] = 3.0;
599 row[1] = 4.0; // ‖row‖₂ = 5
600 let out = l2_normalize_row(row, FASTEMBED_GEMMA_DIM);
601 let l2 = out.iter().map(|x| (*x as f64).powi(2)).sum::<f64>().sqrt();
602 assert!((l2 - 1.0).abs() < 1e-6, "expected unit norm, got {l2}");
603 // 3/5 = 0.6, 4/5 = 0.8 — exact in f32.
604 assert!((out[0] - 0.6).abs() < 1e-6);
605 assert!((out[1] - 0.8).abs() < 1e-6);
606 }
607
608 #[test]
609 fn l2_normalize_degenerate_row_returned_as_is() {
610 // All-zero row: norm < EPSILON → returned unchanged (matches
611 // candle_gemma).
612 let row = vec![0.0_f32; FASTEMBED_GEMMA_DIM];
613 let out = l2_normalize_row(row, FASTEMBED_GEMMA_DIM);
614 assert_eq!(out.len(), FASTEMBED_GEMMA_DIM);
615 assert!(out.iter().all(|&x| x == 0.0));
616 }
617
618 // ---- Phase 20 Plan 20-01 ------------------------------------------------
619 // ExecutionPreference + parse_execution tests live alongside their
620 // implementation in `crate::fastembed_exec`. Tests below cover the parts
621 // of Plan 20-01 that touch the embedder construction surface specifically:
622 // from_user_defined error paths + PoolingMode mapping.
623
624 #[test]
625 fn from_user_defined_empty_onnx_returns_actionable_error() {
626 // Empty bytes path — the constructor short-circuits BEFORE calling
627 // into fastembed/ORT (so this test is offline-runnable). The error
628 // string MUST contain `"fastembed"` so operators can grep for it.
629 let opts = FastembedUserDefinedOpts {
630 onnx_file: Vec::new(),
631 tokenizer_file: vec![0u8; 4],
632 dim: 768,
633 ..Default::default()
634 };
635 let err = FastembedEmbedder::from_user_defined(opts).expect_err("empty onnx");
636 let msg = format!("{err}");
637 assert!(
638 msg.contains("fastembed") && msg.contains("onnx_file"),
639 "unexpected error message: {msg}"
640 );
641 }
642
643 #[test]
644 fn from_user_defined_empty_tokenizer_returns_actionable_error() {
645 let opts = FastembedUserDefinedOpts {
646 onnx_file: vec![0u8; 4],
647 tokenizer_file: Vec::new(),
648 dim: 768,
649 ..Default::default()
650 };
651 let err = FastembedEmbedder::from_user_defined(opts).expect_err("empty tokenizer");
652 let msg = format!("{err}");
653 assert!(
654 msg.contains("fastembed") && msg.contains("tokenizer_file"),
655 "unexpected error message: {msg}"
656 );
657 }
658
659 #[test]
660 fn from_user_defined_zero_dim_returns_actionable_error() {
661 let opts = FastembedUserDefinedOpts {
662 onnx_file: vec![0u8; 4],
663 tokenizer_file: vec![0u8; 4],
664 dim: 0,
665 ..Default::default()
666 };
667 let err = FastembedEmbedder::from_user_defined(opts).expect_err("zero dim");
668 let msg = format!("{err}");
669 assert!(msg.contains("fastembed") && msg.contains("dim"), "unexpected: {msg}");
670 }
671
672 #[test]
673 fn from_user_defined_bad_onnx_bytes_surfaces_fastembed_error() {
674 // Non-empty but invalid ONNX bytes — passes our front-door validation
675 // and hits fastembed/ORT proper, which rejects them. The error MUST
676 // be a `LunarisError::Storage(StorageError::Backend(..))` containing
677 // the `"fastembed"` substring.
678 let opts = FastembedUserDefinedOpts {
679 onnx_file: b"not-a-real-onnx-graph".to_vec(),
680 tokenizer_file: b"not-a-real-tokenizer".to_vec(),
681 dim: 768,
682 ..Default::default()
683 };
684 let err = FastembedEmbedder::from_user_defined(opts).expect_err("bad bytes");
685 let msg = format!("{err}");
686 assert!(msg.contains("fastembed"), "expected fastembed-prefixed error, got: {msg}");
687 }
688
689 #[test]
690 fn pooling_mode_maps_to_fastembed_pooling() {
691 let cls: Pooling = PoolingMode::Cls.into();
692 assert!(matches!(cls, Pooling::Cls));
693 let mean: Pooling = PoolingMode::Mean.into();
694 assert!(matches!(mean, Pooling::Mean));
695 }
696}
697
698// -----------------------------------------------------------------------------
699// `embedder-it`-gated real-model smoke. Auto-downloads ~600 MB of ONNX weights
700// on first run (30-90s cold; subsequent runs hit the cache in `~/.cache/
701// lunaris/models/fastembed/embeddinggemma-300m-onnx/`). Verify by deleting
702// that subdir and re-running — fastembed re-downloads transparently. Not
703// included in the default test run; CI's existing `embedder-it` job picks
704// this up automatically and Plan 19-02 expands the matrix.
705// -----------------------------------------------------------------------------
706#[cfg(all(test, feature = "embedder-it"))]
707mod live_tests {
708 use super::*;
709
710 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
711 async fn fastembed_loads_real_model_and_embeds_one_batch() {
712 let embedder = FastembedEmbedder::new(FastembedEmbedderOpts::default())
713 .expect("real model load — auto-download to ~/.cache/lunaris/models/fastembed/");
714 assert_eq!(embedder.dim(), FASTEMBED_GEMMA_DIM);
715 let inputs: [&str; 2] = ["hello world", "lunaris memory engine"];
716 let vecs = embedder.embed_batch(&inputs).await.expect("embed_batch");
717 assert_eq!(vecs.len(), 2);
718 for v in &vecs {
719 assert_eq!(v.len(), FASTEMBED_GEMMA_DIM);
720 let l2 = v.iter().map(|x| (*x as f64).powi(2)).sum::<f64>().sqrt();
721 assert!((l2 - 1.0).abs() < 1e-3, "L2 norm = {l2}, expected ~ 1.0");
722 }
723 }
724}