lunaris_embed/lib.rs
1//! lunaris-embed — real `Embedder` impls for Phase 2 hot path (INGEST-02).
2//!
3//! - **Default backend** (`feature = "candle"`): `CandleEmbeddingGemma` — loads
4//! EmbeddingGemma 300M tokenizer + token-embedding matrix from a local cache,
5//! mean-pools the embedded token vectors per input, and L2-normalises to a 768-d
6//! unit vector. Falls back with an actionable `LunarisError::Storage` error
7//! when the weights cache is missing.
8//! - **Alt backend** (`feature = "ollama"`): `OllamaEmbedder` — POSTs each batch
9//! to `<endpoint>/api/embed` (Ollama's `embed` HTTP API), validates the response
10//! shape against [`Embedder::dim`], and returns 768-d rows. 10s HTTP timeout
11//! (CLAUDE.md: "design for failure — timeouts").
12//!
13//! Phase 1's [`lunaris_core::StubEmbedder`] remains the deterministic test impl —
14//! ingest tests inject it via the `Lunaris::with_embedder` escape hatch so they
15//! don't pay model-load latency. Production callers get `CandleEmbeddingGemma`
16//! by default through `Lunaris::open(url)` (Plan 02-01 Task 3).
17//!
18//! ## Latency budget swap escape hatch
19//!
20//! Per `02-01-PLAN.md` critical constraints: if candle local inference busts
21//! the per-batch budget on the dev box (8ms p50 / 20ms p99 per blueprint §4.1),
22//! callers swap to `OllamaEmbedder` via `Lunaris::with_embedder(Arc::new(...))`.
23//! The trait shape does NOT change either way — that's the whole point of the
24//! Phase 1 [`Embedder`] interface lock.
25#![deny(rust_2018_idioms, unreachable_pub)]
26#![forbid(unsafe_code)]
27
28#[cfg(feature = "candle")]
29pub mod candle_gemma;
30// RFC 0007 §3 — FallbackEmbedder<P, F> static-dispatch combinator with
31// per-instance CircuitBreaker. Always built; mirrors lunaris-extract::fallback.
32pub mod fallback;
33#[cfg(feature = "fastembed")]
34pub mod fastembed;
35#[cfg(feature = "fastembed")]
36pub mod fastembed_exec;
37#[cfg(feature = "ollama")]
38pub mod ollama;
39
40#[cfg(feature = "candle")]
41pub use candle_gemma::{CandleEmbeddingGemma, CandleEmbeddingGemmaOpts};
42#[cfg(feature = "fastembed")]
43pub use fastembed::{
44 FASTEMBED_EXECUTION_ENV, FastembedEmbedder, FastembedEmbedderOpts, FastembedUserDefinedOpts,
45 PoolingMode,
46};
47#[cfg(feature = "fastembed")]
48pub use fastembed_exec::{ExecutionPreference, execution_from_env, parse_execution};
49#[cfg(feature = "ollama")]
50pub use ollama::{OllamaEmbedder, OllamaEmbedderOpts};
51
52// Re-export the trait from core so callers can `use lunaris_embed::Embedder` in
53// one import alongside the concrete backends.
54pub use lunaris_core::Embedder;