smooth_operator_server/embedder.rs
1//! Embedder selection — the seam that makes production retrieval *real*.
2//!
3//! Dense retrieval only works if documents and queries are projected by the
4//! **same** embedder. There are two implementations of the
5//! [`Embedder`](smooth_operator::embedding::Embedder) trait in this workspace:
6//!
7//! | Embedder | Dim | When |
8//! | ----------------------- | ---- | ----------------------------------------------------- |
9//! | [`GatewayEmbedder`] | 1536 | **Production.** `text-embedding-3-small` over the gateway. |
10//! | [`DeterministicEmbedder`] | 1024 | Offline / dev / tests. FNV-1a token hash — *not* semantic. |
11//!
12//! [`build_embedder`] picks between them from configuration: when a gateway key
13//! (and URL/model) is present it returns the **real, semantic** [`GatewayEmbedder`];
14//! otherwise it falls back to the network-free [`DeterministicEmbedder`] and logs a
15//! loud [`tracing::warn!`] so an operator can't mistake a hash-stub index for a
16//! real one. The fallback keeps the 257-test offline baseline (and local dev)
17//! working with zero credentials.
18//!
19//! The store dimension **must** match the active embedder's
20//! [`dim`](smooth_operator::embedding::Embedder::dim) — mixing 1024-d and 1536-d
21//! vectors silently breaks retrieval. Both the server `/index` handler and the
22//! `dev-support` example build their embedder here so the choice (and its
23//! dimension) is made in exactly one place.
24
25use std::sync::Arc;
26
27use smooth_operator::embedding::{DeterministicEmbedder, Embedder};
28#[cfg(feature = "postgres")]
29use smooth_operator_adapter_postgres::GatewayEmbedder;
30
31/// The dimension of the gateway embedder's `text-embedding-3-small` output. Re-
32/// exported from the postgres adapter on the default (cloud) build; defined
33/// locally on the lean build so the constant — and any caller that names it —
34/// still resolves without the postgres crate. The two definitions agree (1536).
35#[cfg(feature = "postgres")]
36pub use smooth_operator_adapter_postgres::OPENAI_SMALL_EMBEDDING_DIM;
37#[cfg(not(feature = "postgres"))]
38pub const OPENAI_SMALL_EMBEDDING_DIM: usize = 1536;
39
40/// Inputs the embedder selector needs. A small struct (rather than the whole
41/// [`ServerConfig`](crate::config::ServerConfig)) so the `dev-support` example —
42/// which has its own config type — can call the same selector.
43#[derive(Debug, Clone)]
44pub struct EmbedderConfig {
45 /// The gateway base URL (e.g. `https://llm.smoo.ai/v1`).
46 pub gateway_url: String,
47 /// The gateway API key. `Some` ⇒ real [`GatewayEmbedder`]; `None` ⇒ the
48 /// deterministic fallback.
49 pub gateway_key: Option<String>,
50 /// The embedding model id (e.g. `text-embedding-3-small`).
51 pub model: String,
52}
53
54impl EmbedderConfig {
55 /// Build from the server config, defaulting the embedding model.
56 #[must_use]
57 pub fn from_server_config(config: &crate::config::ServerConfig) -> Self {
58 Self {
59 gateway_url: config.gateway_url.clone(),
60 gateway_key: config.gateway_key.clone(),
61 model: DEFAULT_EMBEDDING_MODEL.to_string(),
62 }
63 }
64}
65
66/// The embedding model the gateway selector requests (OpenAI-compatible,
67/// 1536-d). Distinct from the *chat* model (`SMOOTH_AGENT_MODEL`).
68pub const DEFAULT_EMBEDDING_MODEL: &str = "text-embedding-3-small";
69
70/// Select the embedder for the index/retrieval path from configuration.
71///
72/// - **Keyed** (`gateway_key` present): the real [`GatewayEmbedder`] —
73/// `text-embedding-3-small`, **1536-d**, the production semantic path.
74/// - **Unkeyed**: the network-free [`DeterministicEmbedder`] — **1024-d**, a
75/// reproducible FNV-1a token hash that is *not* semantic. Logs a loud
76/// [`tracing::warn!`] so this can't be mistaken for real retrieval.
77///
78/// The returned embedder's [`dim`](Embedder::dim) is the source of truth for the
79/// store's vector width (1536 vs 1024) — callers must create the knowledge store
80/// with `embedder.dim()`, never a hardcoded constant.
81#[must_use]
82pub fn build_embedder(config: &EmbedderConfig) -> Arc<dyn Embedder> {
83 match &config.gateway_key {
84 // The real, semantic GatewayEmbedder lives in the postgres adapter crate,
85 // so it's only available on a build with the `postgres` feature (the
86 // default / cloud build). On a lean `--no-default-features` build this arm
87 // is compiled out and a present key falls through to the deterministic
88 // fallback below — the lean/embed build has no gateway-backed retrieval.
89 #[cfg(feature = "postgres")]
90 Some(key) if !key.trim().is_empty() => {
91 tracing::info!(
92 model = %config.model,
93 "using GatewayEmbedder (semantic, 1536-d) for retrieval"
94 );
95 Arc::new(GatewayEmbedder::new(
96 config.gateway_url.clone(),
97 key.clone(),
98 config.model.clone(),
99 OPENAI_SMALL_EMBEDDING_DIM,
100 ))
101 }
102 _ => {
103 tracing::warn!(
104 "using non-semantic DeterministicEmbedder (FNV-1a hash, 1024-d) — \
105 set SMOOAI_GATEWAY_KEY for real semantic retrieval"
106 );
107 Arc::new(DeterministicEmbedder::new())
108 }
109 }
110}
111
112#[cfg(test)]
113mod tests {
114 use super::*;
115 use smooth_operator::embedding::DEFAULT_EMBEDDING_DIM;
116
117 fn cfg(key: Option<&str>) -> EmbedderConfig {
118 EmbedderConfig {
119 gateway_url: "https://example.test/v1".into(),
120 gateway_key: key.map(str::to_string),
121 model: DEFAULT_EMBEDDING_MODEL.to_string(),
122 }
123 }
124
125 #[cfg(feature = "postgres")]
126 #[test]
127 fn keyed_config_selects_gateway_embedder_1536() {
128 // A present key ⇒ the real GatewayEmbedder. We assert via its 1536-d
129 // signature (no network call — `dim()` is local). This is the production
130 // path the adversarial review flagged was never reached. Only on a build
131 // with the `postgres` feature (the gateway embedder lives in that crate).
132 let embedder = build_embedder(&cfg(Some("sk-test")));
133 assert_eq!(
134 embedder.dim(),
135 OPENAI_SMALL_EMBEDDING_DIM,
136 "keyed config must select the 1536-d GatewayEmbedder"
137 );
138 }
139
140 #[cfg(not(feature = "postgres"))]
141 #[test]
142 fn keyed_config_falls_back_to_deterministic_on_lean_build() {
143 // On a lean `--no-default-features` build the GatewayEmbedder isn't
144 // compiled in, so even a present key falls back to the deterministic
145 // (1024-d) embedder — the lean/embed build has no gateway retrieval.
146 let embedder = build_embedder(&cfg(Some("sk-test")));
147 assert_eq!(
148 embedder.dim(),
149 DEFAULT_EMBEDDING_DIM,
150 "lean build (no postgres feature) has no GatewayEmbedder; key is ignored"
151 );
152 }
153
154 #[test]
155 fn unkeyed_config_falls_back_to_deterministic_1024() {
156 // No key ⇒ the deterministic fallback (the warn! path). 1024-d, offline.
157 let embedder = build_embedder(&cfg(None));
158 assert_eq!(
159 embedder.dim(),
160 DEFAULT_EMBEDDING_DIM,
161 "unkeyed config must fall back to the 1024-d DeterministicEmbedder"
162 );
163 }
164
165 #[test]
166 fn empty_or_whitespace_key_falls_back_to_deterministic() {
167 // A blank/whitespace key is treated as absent (mirrors ServerConfig's
168 // own empty-string filtering) — fall back, don't try to auth with "".
169 assert_eq!(build_embedder(&cfg(Some(""))).dim(), DEFAULT_EMBEDDING_DIM);
170 assert_eq!(
171 build_embedder(&cfg(Some(" "))).dim(),
172 DEFAULT_EMBEDDING_DIM
173 );
174 }
175}