mnm_core/embedder_identity.rs
1//! Deriving the embedding-model identity (name + dim + dtype) used to COMPUTE
2//! an embedding from the SAME source that LABELS the resulting vectors.
3//!
4//! The source of truth is the corpus's active model from
5//! `GET /v1/models/active`.
6//!
7//! ## Why this exists (the cross-element drift bug)
8//!
9//! RAG correctness requires that query vectors and stored vectors come from the
10//! SAME embedding model. Historically every client embedded with a model
11//! NAME/dim/dtype taken from LOCAL CONFIG (`[models].embedding` /
12//! `code_embedding` / `voyage_output_dimension` / `voyage_output_dtype`) but
13//! LABELED the resulting vectors with the corpus wire id fetched separately from
14//! `GET /v1/models/active`. The server's only consistency guard compares that
15//! label string + vector length — it never checks the vectors were actually
16//! produced by the labeled model. Because Voyage's voyage-3 / voyage-code-3 /
17//! voyage-context-3 all emit 1024-dim float vectors, pointing a client's config
18//! at a different-but-same-dimension model produced vectors that mismatched the
19//! corpus yet passed both guards → silently-wrong cosine similarities.
20//!
21//! The fix: DERIVE the model name + dim + dtype that drive embedder construction
22//! from the SAME active-model response that supplies the wire-id label, so the
23//! two cannot diverge. Local config survives only as a logged fallback for the
24//! offline / active-fetch-unavailable path.
25
26/// The three values an embedder constructor needs:
27/// `VoyageEmbedder::new(api_key, name, dim, dtype)` /
28/// `ContextualizedVoyageEmbedder::new(api_key, name, dim, dtype)`.
29///
30/// The API key is intentionally NOT here — a key is auth, not model identity,
31/// and is resolved independently (`resolve_voyage_api_key`).
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct EmbedderIdentity {
34 /// Bare model name (e.g. `voyage-context-3`), NOT the `name@revision` wire id.
35 pub name: String,
36 /// Matryoshka output dimension sent as `output_dimension`.
37 pub dim: u32,
38 /// Output dtype sent as `output_dtype` (e.g. `"float"`).
39 pub dtype: String,
40}
41
42/// What the active-model fetch reported for one model (general or code).
43///
44/// All three fields are present iff the fetch succeeded AND the model exists; a
45/// failed/absent fetch is represented by passing `None` to [`derive()`].
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct ActiveModelIdentity {
48 /// Bare model name from the active-model response.
49 pub name: String,
50 /// Dimension from the active-model response.
51 pub dim: u32,
52 /// Dtype from the active-model response (new field; `"float"` today).
53 pub dtype: String,
54}
55
56/// Config-supplied fallback identity, used ONLY when the active-model fetch is
57/// unavailable (offline behavior preservation). These values are no longer the
58/// authority for what the embedder computes.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct FallbackIdentity<'a> {
61 /// Config model name (`[models].embedding` or `[models].code_embedding`).
62 pub name: &'a str,
63 /// Config dimension (`[models].voyage_output_dimension`).
64 pub dim: u32,
65 /// Config dtype (`[models].voyage_output_dtype`).
66 pub dtype: &'a str,
67}
68
69/// Derive the embedder identity, preferring the active-model fetch over config.
70///
71/// When `active` is `Some`, its `{name, dim, dtype}` win outright — this is the
72/// authoritative path that keeps the embed model and the wire-id label in
73/// lockstep. When `active` is `None` (the fetch was unavailable), `fallback`
74/// (local config) is used instead and a `tracing::warn!` is emitted so the
75/// degraded path is visible in logs; this preserves offline behavior but is no
76/// longer trusted for correctness.
77///
78/// `which` is a short label (`"general"` / `"code"`) for the warning line.
79///
80/// Use [`derive_quiet`] when the caller has ALREADY logged the fetch failure
81/// (so a single event doesn't warn once per identity) or when the derived
82/// identity won't actually drive an embed (e.g. fts mode embeds nothing) — both
83/// would otherwise emit a misleading "vectors are NOT guaranteed to match"
84/// warning for a vector that is never produced.
85#[must_use]
86pub fn derive(
87 which: &str,
88 active: Option<&ActiveModelIdentity>,
89 fallback: &FallbackIdentity<'_>,
90) -> EmbedderIdentity {
91 derive_inner(which, active, fallback, true)
92}
93
94/// Like [`derive()`] but suppresses the fallback `tracing::warn!`.
95///
96/// For callers that already logged the active-fetch failure (avoiding a
97/// warn-per-identity for one event) or that won't use the identity for an
98/// actual embed.
99#[must_use]
100pub fn derive_quiet(
101 which: &str,
102 active: Option<&ActiveModelIdentity>,
103 fallback: &FallbackIdentity<'_>,
104) -> EmbedderIdentity {
105 derive_inner(which, active, fallback, false)
106}
107
108fn derive_inner(
109 which: &str,
110 active: Option<&ActiveModelIdentity>,
111 fallback: &FallbackIdentity<'_>,
112 warn_on_fallback: bool,
113) -> EmbedderIdentity {
114 active.map_or_else(
115 || {
116 if warn_on_fallback {
117 tracing::warn!(
118 which,
119 fallback_model = fallback.name,
120 fallback_dim = fallback.dim,
121 fallback_dtype = fallback.dtype,
122 "active-model fetch unavailable; embedding with local config as a fallback \
123 (vectors are NOT guaranteed to match the corpus model)"
124 );
125 }
126 EmbedderIdentity {
127 name: fallback.name.to_owned(),
128 dim: fallback.dim,
129 dtype: fallback.dtype.to_owned(),
130 }
131 },
132 |a| EmbedderIdentity {
133 name: a.name.clone(),
134 dim: a.dim,
135 dtype: a.dtype.clone(),
136 },
137 )
138}
139
140#[cfg(test)]
141mod tests {
142 use super::*;
143
144 fn fallback() -> FallbackIdentity<'static> {
145 FallbackIdentity {
146 name: "config-model",
147 dim: 256,
148 dtype: "int8",
149 }
150 }
151
152 /// The authoritative path: an active-model response drives the embedder
153 /// identity verbatim, NOT the (divergent) config. This is the property the
154 /// cross-element drift bug violated.
155 #[test]
156 fn derive_prefers_active_over_divergent_config() {
157 let active = ActiveModelIdentity {
158 name: "voyage-context-3".to_owned(),
159 dim: 1024,
160 dtype: "float".to_owned(),
161 };
162 let id = derive("general", Some(&active), &fallback());
163 // Every field comes from the active response, none from the config fallback.
164 assert_eq!(id.name, "voyage-context-3");
165 assert_eq!(id.dim, 1024);
166 assert_eq!(id.dtype, "float");
167 assert_ne!(id.name, "config-model");
168 assert_ne!(id.dim, 256);
169 assert_ne!(id.dtype, "int8");
170 }
171
172 /// The offline path: with no active-model response, config is the fallback.
173 #[test]
174 fn derive_falls_back_to_config_when_active_absent() {
175 let id = derive("code", None, &fallback());
176 assert_eq!(id.name, "config-model");
177 assert_eq!(id.dim, 256);
178 assert_eq!(id.dtype, "int8");
179 }
180}