Skip to main content

smooth_operator_server/
embedder.rs

1//! Embedder selection — the seam that makes production retrieval *real*.
2//!
3//! Dense retrieval only works if documents and queries are projected by the
4//! **same** embedder. There are two implementations of the
5//! [`Embedder`](smooth_operator::embedding::Embedder) trait in this workspace:
6//!
7//! | Embedder                | Dim  | When                                                  |
8//! | ----------------------- | ---- | ----------------------------------------------------- |
9//! | [`GatewayEmbedder`]     | 1536 | **Production.** `text-embedding-3-small` over the gateway. |
10//! | [`DeterministicEmbedder`] | 1024 | Offline / dev / tests. FNV-1a token hash — *not* semantic. |
11//!
12//! [`build_embedder`] picks between them from configuration: when a gateway key
13//! (and URL/model) is present it returns the **real, semantic** [`GatewayEmbedder`];
14//! otherwise it falls back to the network-free [`DeterministicEmbedder`] and logs a
15//! loud [`tracing::warn!`] so an operator can't mistake a hash-stub index for a
16//! real one. The fallback keeps the 257-test offline baseline (and local dev)
17//! working with zero credentials.
18//!
19//! The store dimension **must** match the active embedder's
20//! [`dim`](smooth_operator::embedding::Embedder::dim) — mixing 1024-d and 1536-d
21//! vectors silently breaks retrieval. Both the server `/index` handler and the
22//! `dev-support` example build their embedder here so the choice (and its
23//! dimension) is made in exactly one place.
24
25use std::sync::Arc;
26
27use smooth_operator::embedding::{DeterministicEmbedder, Embedder};
28use smooth_operator_adapter_postgres::GatewayEmbedder;
29
30/// Inputs the embedder selector needs. A small struct (rather than the whole
31/// [`ServerConfig`](crate::config::ServerConfig)) so the `dev-support` example —
32/// which has its own config type — can call the same selector.
33#[derive(Debug, Clone)]
34pub struct EmbedderConfig {
35    /// The gateway base URL (e.g. `https://llm.smoo.ai/v1`).
36    pub gateway_url: String,
37    /// The gateway API key. `Some` ⇒ real [`GatewayEmbedder`]; `None` ⇒ the
38    /// deterministic fallback.
39    pub gateway_key: Option<String>,
40    /// The embedding model id (e.g. `text-embedding-3-small`).
41    pub model: String,
42}
43
44impl EmbedderConfig {
45    /// Build from the server config, defaulting the embedding model.
46    #[must_use]
47    pub fn from_server_config(config: &crate::config::ServerConfig) -> Self {
48        Self {
49            gateway_url: config.gateway_url.clone(),
50            gateway_key: config.gateway_key.clone(),
51            model: DEFAULT_EMBEDDING_MODEL.to_string(),
52        }
53    }
54}
55
56/// The embedding model the gateway selector requests (OpenAI-compatible,
57/// 1536-d). Distinct from the *chat* model (`SMOOTH_AGENT_MODEL`).
58pub const DEFAULT_EMBEDDING_MODEL: &str = "text-embedding-3-small";
59
60/// Select the embedder for the index/retrieval path from configuration.
61///
62/// - **Keyed** (`gateway_key` present): the real [`GatewayEmbedder`] —
63///   `text-embedding-3-small`, **1536-d**, the production semantic path.
64/// - **Unkeyed**: the network-free [`DeterministicEmbedder`] — **1024-d**, a
65///   reproducible FNV-1a token hash that is *not* semantic. Logs a loud
66///   [`tracing::warn!`] so this can't be mistaken for real retrieval.
67///
68/// The returned embedder's [`dim`](Embedder::dim) is the source of truth for the
69/// store's vector width (1536 vs 1024) — callers must create the knowledge store
70/// with `embedder.dim()`, never a hardcoded constant.
71#[must_use]
72pub fn build_embedder(config: &EmbedderConfig) -> Arc<dyn Embedder> {
73    match &config.gateway_key {
74        Some(key) if !key.trim().is_empty() => {
75            tracing::info!(
76                model = %config.model,
77                "using GatewayEmbedder (semantic, 1536-d) for retrieval"
78            );
79            Arc::new(GatewayEmbedder::new(
80                config.gateway_url.clone(),
81                key.clone(),
82                config.model.clone(),
83                smooth_operator_adapter_postgres::OPENAI_SMALL_EMBEDDING_DIM,
84            ))
85        }
86        _ => {
87            tracing::warn!(
88                "using non-semantic DeterministicEmbedder (FNV-1a hash, 1024-d) — \
89                 set SMOOAI_GATEWAY_KEY for real semantic retrieval"
90            );
91            Arc::new(DeterministicEmbedder::new())
92        }
93    }
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99    use smooth_operator::embedding::DEFAULT_EMBEDDING_DIM;
100    use smooth_operator_adapter_postgres::OPENAI_SMALL_EMBEDDING_DIM;
101
102    fn cfg(key: Option<&str>) -> EmbedderConfig {
103        EmbedderConfig {
104            gateway_url: "https://example.test/v1".into(),
105            gateway_key: key.map(str::to_string),
106            model: DEFAULT_EMBEDDING_MODEL.to_string(),
107        }
108    }
109
110    #[test]
111    fn keyed_config_selects_gateway_embedder_1536() {
112        // A present key ⇒ the real GatewayEmbedder. We assert via its 1536-d
113        // signature (no network call — `dim()` is local). This is the production
114        // path the adversarial review flagged was never reached.
115        let embedder = build_embedder(&cfg(Some("sk-test")));
116        assert_eq!(
117            embedder.dim(),
118            OPENAI_SMALL_EMBEDDING_DIM,
119            "keyed config must select the 1536-d GatewayEmbedder"
120        );
121    }
122
123    #[test]
124    fn unkeyed_config_falls_back_to_deterministic_1024() {
125        // No key ⇒ the deterministic fallback (the warn! path). 1024-d, offline.
126        let embedder = build_embedder(&cfg(None));
127        assert_eq!(
128            embedder.dim(),
129            DEFAULT_EMBEDDING_DIM,
130            "unkeyed config must fall back to the 1024-d DeterministicEmbedder"
131        );
132    }
133
134    #[test]
135    fn empty_or_whitespace_key_falls_back_to_deterministic() {
136        // A blank/whitespace key is treated as absent (mirrors ServerConfig's
137        // own empty-string filtering) — fall back, don't try to auth with "".
138        assert_eq!(build_embedder(&cfg(Some(""))).dim(), DEFAULT_EMBEDDING_DIM);
139        assert_eq!(
140            build_embedder(&cfg(Some("   "))).dim(),
141            DEFAULT_EMBEDDING_DIM
142        );
143    }
144}