Skip to main content

mnm_core/
embedder_identity.rs

1//! Deriving the embedding-model identity (name + dim + dtype) used to COMPUTE
2//! an embedding from the SAME source that LABELS the resulting vectors.
3//!
4//! The source of truth is the corpus's active model from
5//! `GET /v1/models/active`.
6//!
7//! ## Why this exists (the cross-element drift bug)
8//!
9//! RAG correctness requires that query vectors and stored vectors come from the
10//! SAME embedding model. Historically every client embedded with a model
11//! NAME/dim/dtype taken from LOCAL CONFIG (`[models].embedding` /
12//! `code_embedding` / `voyage_output_dimension` / `voyage_output_dtype`) but
13//! LABELED the resulting vectors with the corpus wire id fetched separately from
14//! `GET /v1/models/active`. The server's only consistency guard compares that
15//! label string + vector length — it never checks the vectors were actually
16//! produced by the labeled model. Because Voyage's voyage-3 / voyage-code-3 /
17//! voyage-context-3 all emit 1024-dim float vectors, pointing a client's config
18//! at a different-but-same-dimension model produced vectors that mismatched the
19//! corpus yet passed both guards → silently-wrong cosine similarities.
20//!
21//! The fix: DERIVE the model name + dim + dtype that drive embedder construction
22//! from the SAME active-model response that supplies the wire-id label, so the
23//! two cannot diverge. Local config survives only as a logged fallback for the
24//! offline / active-fetch-unavailable path.
25
26/// The three values an embedder constructor needs:
27/// `VoyageEmbedder::new(api_key, name, dim, dtype)` /
28/// `ContextualizedVoyageEmbedder::new(api_key, name, dim, dtype)`.
29///
30/// The API key is intentionally NOT here — a key is auth, not model identity,
31/// and is resolved independently (`resolve_voyage_api_key`).
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct EmbedderIdentity {
34    /// Bare model name (e.g. `voyage-context-3`), NOT the `name@revision` wire id.
35    pub name: String,
36    /// Matryoshka output dimension sent as `output_dimension`.
37    pub dim: u32,
38    /// Output dtype sent as `output_dtype` (e.g. `"float"`).
39    pub dtype: String,
40}
41
42/// What the active-model fetch reported for one model (general or code).
43///
44/// All three fields are present iff the fetch succeeded AND the model exists; a
45/// failed/absent fetch is represented by passing `None` to [`derive()`].
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct ActiveModelIdentity {
48    /// Bare model name from the active-model response.
49    pub name: String,
50    /// Dimension from the active-model response.
51    pub dim: u32,
52    /// Dtype from the active-model response (new field; `"float"` today).
53    pub dtype: String,
54}
55
56/// Config-supplied fallback identity, used ONLY when the active-model fetch is
57/// unavailable (offline behavior preservation). These values are no longer the
58/// authority for what the embedder computes.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct FallbackIdentity<'a> {
61    /// Config model name (`[models].embedding` or `[models].code_embedding`).
62    pub name: &'a str,
63    /// Config dimension (`[models].voyage_output_dimension`).
64    pub dim: u32,
65    /// Config dtype (`[models].voyage_output_dtype`).
66    pub dtype: &'a str,
67}
68
69/// Derive the embedder identity, preferring the active-model fetch over config.
70///
71/// When `active` is `Some`, its `{name, dim, dtype}` win outright — this is the
72/// authoritative path that keeps the embed model and the wire-id label in
73/// lockstep. When `active` is `None` (the fetch was unavailable), `fallback`
74/// (local config) is used instead and a `tracing::warn!` is emitted so the
75/// degraded path is visible in logs; this preserves offline behavior but is no
76/// longer trusted for correctness.
77///
78/// `which` is a short label (`"general"` / `"code"`) for the warning line.
79///
80/// Use [`derive_quiet`] when the caller has ALREADY logged the fetch failure
81/// (so a single event doesn't warn once per identity) or when the derived
82/// identity won't actually drive an embed (e.g. fts mode embeds nothing) — both
83/// would otherwise emit a misleading "vectors are NOT guaranteed to match"
84/// warning for a vector that is never produced.
85#[must_use]
86pub fn derive(
87    which: &str,
88    active: Option<&ActiveModelIdentity>,
89    fallback: &FallbackIdentity<'_>,
90) -> EmbedderIdentity {
91    derive_inner(which, active, fallback, true)
92}
93
94/// Like [`derive()`] but suppresses the fallback `tracing::warn!`.
95///
96/// For callers that already logged the active-fetch failure (avoiding a
97/// warn-per-identity for one event) or that won't use the identity for an
98/// actual embed.
99#[must_use]
100pub fn derive_quiet(
101    which: &str,
102    active: Option<&ActiveModelIdentity>,
103    fallback: &FallbackIdentity<'_>,
104) -> EmbedderIdentity {
105    derive_inner(which, active, fallback, false)
106}
107
108fn derive_inner(
109    which: &str,
110    active: Option<&ActiveModelIdentity>,
111    fallback: &FallbackIdentity<'_>,
112    warn_on_fallback: bool,
113) -> EmbedderIdentity {
114    active.map_or_else(
115        || {
116            if warn_on_fallback {
117                tracing::warn!(
118                    which,
119                    fallback_model = fallback.name,
120                    fallback_dim = fallback.dim,
121                    fallback_dtype = fallback.dtype,
122                    "active-model fetch unavailable; embedding with local config as a fallback \
123                     (vectors are NOT guaranteed to match the corpus model)"
124                );
125            }
126            EmbedderIdentity {
127                name: fallback.name.to_owned(),
128                dim: fallback.dim,
129                dtype: fallback.dtype.to_owned(),
130            }
131        },
132        |a| EmbedderIdentity {
133            name: a.name.clone(),
134            dim: a.dim,
135            dtype: a.dtype.clone(),
136        },
137    )
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    fn fallback() -> FallbackIdentity<'static> {
145        FallbackIdentity {
146            name: "config-model",
147            dim: 256,
148            dtype: "int8",
149        }
150    }
151
152    /// The authoritative path: an active-model response drives the embedder
153    /// identity verbatim, NOT the (divergent) config. This is the property the
154    /// cross-element drift bug violated.
155    #[test]
156    fn derive_prefers_active_over_divergent_config() {
157        let active = ActiveModelIdentity {
158            name: "voyage-context-3".to_owned(),
159            dim: 1024,
160            dtype: "float".to_owned(),
161        };
162        let id = derive("general", Some(&active), &fallback());
163        // Every field comes from the active response, none from the config fallback.
164        assert_eq!(id.name, "voyage-context-3");
165        assert_eq!(id.dim, 1024);
166        assert_eq!(id.dtype, "float");
167        assert_ne!(id.name, "config-model");
168        assert_ne!(id.dim, 256);
169        assert_ne!(id.dtype, "int8");
170    }
171
172    /// The offline path: with no active-model response, config is the fallback.
173    #[test]
174    fn derive_falls_back_to_config_when_active_absent() {
175        let id = derive("code", None, &fallback());
176        assert_eq!(id.name, "config-model");
177        assert_eq!(id.dim, 256);
178        assert_eq!(id.dtype, "int8");
179    }
180}