Skip to main content

fathomdb_engine/embedder/
mod.rs

1//! Read-time query embedder trait and identity types.
2//!
3//! Phase 12.5a defines the always-on scaffolding that Phase 12.5b (the
4//! Candle + bge-small-en-v1.5 default implementation) plugs into behind the
5//! `default-embedder` feature flag. The trait lives in `fathomdb-engine`
6//! rather than `fathomdb-query` so that `fathomdb-query` stays a pure
7//! AST-to-plan compiler with no dyn trait objects or runtime state.
8//!
9//! The coordinator owns an `Option<Arc<dyn QueryEmbedder>>`. When present,
10//! `ExecutionCoordinator::fill_vector_branch` invokes `embed_query` on the
11//! raw natural-language query, serializes the returned `Vec<f32>` via
12//! `serde_json` into the JSON float-array literal that
13//! `CompiledVectorSearch::query_text` already expects, and drops a fully
14//! constructed `CompiledVectorSearch` into `plan.vector`. When absent, the
15//! plan's vector slot stays `None` and the Phase 12 v1 dormancy invariant
16//! on `search()` is preserved unchanged.
17
18use thiserror::Error;
19
20#[cfg(feature = "default-embedder")]
21pub mod builtin;
22
23#[cfg(feature = "default-embedder")]
24pub use builtin::BuiltinBgeSmallEmbedder;
25
26/// A read-time query embedder.
27///
28/// Implementations must be `Send + Sync` so the coordinator can share a
29/// single `Arc<dyn QueryEmbedder>` across reader threads without cloning
30/// per call. All methods are `&self` — embedders are expected to be
31/// internally immutable or to manage their own interior mutability.
32pub trait QueryEmbedder: Send + Sync + std::fmt::Debug {
33    /// Embed a single query string into a dense vector.
34    ///
35    /// # Errors
36    /// Returns [`EmbedderError::Unavailable`] if the embedder cannot
37    /// produce a vector right now (e.g. the model weights failed to load
38    /// under a feature-flag stub), or [`EmbedderError::Failed`] if the
39    /// embedding pipeline itself errored. The coordinator treats either
40    /// variant as a graceful degradation, NOT a hard query failure.
41    fn embed_query(&self, text: &str) -> Result<Vec<f32>, EmbedderError>;
42
43    /// Model identity / version / dimension / normalization identity.
44    ///
45    /// Must match the write-time contract for the corresponding vec table.
46    /// Phase 12.5a does not yet enforce the match at runtime; Phase 12.5b
47    /// will gate the vector branch on `identity()` equality with the
48    /// active vector profile.
49    fn identity(&self) -> QueryEmbedderIdentity;
50
51    /// Maximum number of tokens this embedder can process in one call.
52    ///
53    /// The write-time chunker uses this to size text chunks: content
54    /// that fits within `max_tokens()` is stored as one chunk; content
55    /// exceeding it is split at token boundaries. BGE-small-en-v1.5
56    /// has a 512-token context window; long-context embedders (Nomic,
57    /// Jina) return 8192.
58    fn max_tokens(&self) -> usize;
59}
60
61/// Identity metadata for a [`QueryEmbedder`].
62#[derive(Clone, Debug, PartialEq, Eq)]
63pub struct QueryEmbedderIdentity {
64    /// Stable model identifier (e.g. `"bge-small-en-v1.5"`).
65    pub model_identity: String,
66    /// Model version (e.g. `"1.5"`).
67    pub model_version: String,
68    /// Output dimension. Must match the active vector profile's dimension
69    /// or the vector branch will never fire.
70    pub dimension: usize,
71    /// Normalization policy identifier (e.g. `"l2"`, `"none"`).
72    pub normalization_policy: String,
73}
74
75/// Errors reported by a [`QueryEmbedder`].
76///
77/// Both variants are treated as capability misses by the coordinator:
78/// `plan.was_degraded_at_plan_time` is set and the vector branch is
79/// skipped, but the rest of the search pipeline proceeds normally.
80#[derive(Debug, Error)]
81pub enum EmbedderError {
82    /// The embedder is not available at all (e.g. the default-embedder
83    /// feature flag is disabled, or the model weights failed to load).
84    #[error("embedder unavailable: {0}")]
85    Unavailable(String),
86    /// The embedder is present but failed to embed this particular query.
87    #[error("embedding failed: {0}")]
88    Failed(String),
89}
90
91/// A write-time batch embedder used by `regenerate_vector_embeddings_in_process`.
92///
93/// Unlike [`QueryEmbedder`] (which operates one query at a time for read-time
94/// vector search), `BatchEmbedder` accepts a slice of texts and returns a
95/// vector per input. This is more efficient for write-time regeneration
96/// where all chunk texts can be processed together.
97pub trait BatchEmbedder: Send + Sync {
98    /// Embed a batch of texts. Returns one `Vec<f32>` per input text, in
99    /// the same order.
100    ///
101    /// # Errors
102    /// Returns [`EmbedderError`] if the embedder cannot process the batch.
103    fn batch_embed(&self, texts: &[String]) -> Result<Vec<Vec<f32>>, EmbedderError>;
104
105    /// Model identity metadata. Must match the write-time contract for the
106    /// vec table being written.
107    fn identity(&self) -> QueryEmbedderIdentity;
108
109    /// Maximum number of tokens this embedder can process per text chunk.
110    fn max_tokens(&self) -> usize;
111}