fathomdb_engine/embedder/mod.rs
1//! Read-time query embedder trait and identity types.
2//!
3//! Phase 12.5a defines the always-on scaffolding that Phase 12.5b (the
4//! Candle + bge-small-en-v1.5 default implementation) plugs into behind the
5//! `default-embedder` feature flag. The trait lives in `fathomdb-engine`
6//! rather than `fathomdb-query` so that `fathomdb-query` stays a pure
7//! AST-to-plan compiler with no dyn trait objects or runtime state.
8//!
9//! The coordinator owns an `Option<Arc<dyn QueryEmbedder>>`. When present,
10//! `ExecutionCoordinator::fill_vector_branch` invokes `embed_query` on the
11//! raw natural-language query, serializes the returned `Vec<f32>` via
12//! `serde_json` into the JSON float-array literal that
13//! `CompiledVectorSearch::query_text` already expects, and drops a fully
14//! constructed `CompiledVectorSearch` into `plan.vector`. When absent, the
15//! plan's vector slot stays `None` and the Phase 12 v1 dormancy invariant
16//! on `search()` is preserved unchanged.
17
18use thiserror::Error;
19
20#[cfg(feature = "default-embedder")]
21pub mod builtin;
22
23#[cfg(feature = "default-embedder")]
24pub use builtin::BuiltinBgeSmallEmbedder;
25
26/// A read-time query embedder.
27///
28/// Implementations must be `Send + Sync` so the coordinator can share a
29/// single `Arc<dyn QueryEmbedder>` across reader threads without cloning
30/// per call. All methods are `&self` — embedders are expected to be
31/// internally immutable or to manage their own interior mutability.
32pub trait QueryEmbedder: Send + Sync + std::fmt::Debug {
33 /// Embed a single query string into a dense vector.
34 ///
35 /// # Errors
36 /// Returns [`EmbedderError::Unavailable`] if the embedder cannot
37 /// produce a vector right now (e.g. the model weights failed to load
38 /// under a feature-flag stub), or [`EmbedderError::Failed`] if the
39 /// embedding pipeline itself errored. The coordinator treats either
40 /// variant as a graceful degradation, NOT a hard query failure.
41 fn embed_query(&self, text: &str) -> Result<Vec<f32>, EmbedderError>;
42
43 /// Model identity / version / dimension / normalization identity.
44 ///
45 /// Must match the write-time contract for the corresponding vec table.
46 /// Phase 12.5a does not yet enforce the match at runtime; Phase 12.5b
47 /// will gate the vector branch on `identity()` equality with the
48 /// active vector profile.
49 fn identity(&self) -> QueryEmbedderIdentity;
50
51 /// Maximum number of tokens this embedder can process in one call.
52 ///
53 /// The write-time chunker uses this to size text chunks: content
54 /// that fits within `max_tokens()` is stored as one chunk; content
55 /// exceeding it is split at token boundaries. BGE-small-en-v1.5
56 /// has a 512-token context window; long-context embedders (Nomic,
57 /// Jina) return 8192.
58 fn max_tokens(&self) -> usize;
59}
60
61/// Identity metadata for a [`QueryEmbedder`].
62#[derive(Clone, Debug, PartialEq, Eq)]
63pub struct QueryEmbedderIdentity {
64 /// Stable model identifier (e.g. `"bge-small-en-v1.5"`).
65 pub model_identity: String,
66 /// Model version (e.g. `"1.5"`).
67 pub model_version: String,
68 /// Output dimension. Must match the active vector profile's dimension
69 /// or the vector branch will never fire.
70 pub dimension: usize,
71 /// Normalization policy identifier (e.g. `"l2"`, `"none"`).
72 pub normalization_policy: String,
73}
74
75/// Errors reported by a [`QueryEmbedder`].
76///
77/// Both variants are treated as capability misses by the coordinator:
78/// `plan.was_degraded_at_plan_time` is set and the vector branch is
79/// skipped, but the rest of the search pipeline proceeds normally.
80#[derive(Debug, Error)]
81pub enum EmbedderError {
82 /// The embedder is not available at all (e.g. the default-embedder
83 /// feature flag is disabled, or the model weights failed to load).
84 #[error("embedder unavailable: {0}")]
85 Unavailable(String),
86 /// The embedder is present but failed to embed this particular query.
87 #[error("embedding failed: {0}")]
88 Failed(String),
89}
90
91/// A write-time batch embedder used by `regenerate_vector_embeddings_in_process`.
92///
93/// Unlike [`QueryEmbedder`] (which operates one query at a time for read-time
94/// vector search), `BatchEmbedder` accepts a slice of texts and returns a
95/// vector per input. This is more efficient for write-time regeneration
96/// where all chunk texts can be processed together.
97pub trait BatchEmbedder: Send + Sync {
98 /// Embed a batch of texts. Returns one `Vec<f32>` per input text, in
99 /// the same order.
100 ///
101 /// # Errors
102 /// Returns [`EmbedderError`] if the embedder cannot process the batch.
103 fn batch_embed(&self, texts: &[String]) -> Result<Vec<Vec<f32>>, EmbedderError>;
104
105 /// Model identity metadata. Must match the write-time contract for the
106 /// vec table being written.
107 fn identity(&self) -> QueryEmbedderIdentity;
108
109 /// Maximum number of tokens this embedder can process per text chunk.
110 fn max_tokens(&self) -> usize;
111}