smooth_operator_server/reranker.rs
1//! Reranker selection — the opt-in post-retrieval reorder stage (feature gap G8).
2//!
3//! Hybrid retrieval (dense ∪ sparse → RRF) gives a good rank-ordered top-K, but
4//! the fusion score is a *rank* signal, not a sharp relevance score against the
5//! query. A reranker reorders that candidate set with a cross-encoder before it
6//! reaches the model. Unlike the embedder — which is *required* for dense
7//! retrieval to work at all — the reranker is **opt-in**: the default is the
8//! identity [`NoopReranker`], so wiring the selector in never changes existing
9//! behavior.
10//!
11//! There are three implementations of the
12//! [`Reranker`](smooth_operator::rerank::Reranker) trait in this workspace:
13//!
14//! | Reranker | Network | When |
15//! | ------------------- | ------- | ---------------------------------------------------------- |
16//! | [`GatewayReranker`] | yes | **Production.** Cohere/Voyage `/v1/rerank` over the gateway. |
17//! | [`LexicalReranker`] | no | Offline deterministic reorder (BM25-ish lexical overlap). |
18//! | [`NoopReranker`] | no | **Default.** Identity — rerank is off, order unchanged. |
19//!
20//! [`build_reranker`] picks from configuration, mirroring
21//! [`build_embedder`](crate::embedder::build_embedder):
22//!
23//! - **Keyed** (gateway key present) ⇒ the real [`GatewayReranker`], the
24//! production semantic reorder. Logs a [`tracing::info!`].
25//! - **Unkeyed + lexical requested** (`SMOOTH_AGENT_RERANK=lexical`) ⇒ the
26//! network-free [`LexicalReranker`] for an offline reorder.
27//! - **Unkeyed (default)** ⇒ the identity [`NoopReranker`] — rerank is off, so the
28//! 271-test baseline (and default behavior) is byte-for-byte unchanged. Logs a
29//! [`tracing::info!`] so an operator can see rerank is disabled.
30
31use std::sync::Arc;
32
33use smooth_operator::rerank::{LexicalReranker, NoopReranker, Reranker};
34use smooth_operator_adapter_postgres::{GatewayReranker, DEFAULT_RERANK_MODEL};
35
36/// Inputs the reranker selector needs. A small struct (rather than the whole
37/// [`ServerConfig`](crate::config::ServerConfig)) so other callers can build the
38/// same selector. Mirrors [`EmbedderConfig`](crate::embedder::EmbedderConfig).
39#[derive(Debug, Clone)]
40pub struct RerankerConfig {
41 /// The gateway base URL (e.g. `https://llm.smoo.ai/v1`).
42 pub gateway_url: String,
43 /// The gateway API key. `Some` ⇒ the real [`GatewayReranker`] is eligible.
44 pub gateway_key: Option<String>,
45 /// The rerank model id (e.g. `rerank-english-v3.0`).
46 pub model: String,
47 /// Whether the rerank stage is enabled at all. When `false` (the default), the
48 /// selector returns the identity [`NoopReranker`] regardless of the key, so
49 /// default behavior is unchanged. Driven by `SMOOTH_AGENT_RERANK`.
50 pub mode: RerankMode,
51}
52
53/// Which rerank stage the operator wants. `Off` is the default so the rerank
54/// stage stays opt-in.
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
56pub enum RerankMode {
57 /// Rerank disabled — identity [`NoopReranker`] (default).
58 #[default]
59 Off,
60 /// Gateway cross-encoder if keyed, else fall back to lexical/noop.
61 Gateway,
62 /// Force the offline deterministic [`LexicalReranker`] (no network).
63 Lexical,
64}
65
66impl RerankMode {
67 /// Parse the `SMOOTH_AGENT_RERANK` env value. Unknown/empty ⇒ [`Off`](Self::Off).
68 #[must_use]
69 pub fn parse(s: &str) -> Self {
70 match s.trim().to_ascii_lowercase().as_str() {
71 "gateway" | "on" | "1" | "true" => Self::Gateway,
72 "lexical" => Self::Lexical,
73 _ => Self::Off,
74 }
75 }
76}
77
78impl RerankerConfig {
79 /// Read the rerank mode from `SMOOTH_AGENT_RERANK` (unset ⇒ [`Off`](RerankMode::Off)).
80 #[must_use]
81 pub fn mode_from_env() -> RerankMode {
82 std::env::var("SMOOTH_AGENT_RERANK")
83 .ok()
84 .map(|s| RerankMode::parse(&s))
85 .unwrap_or_default()
86 }
87
88 /// Build from the gateway parts + `SMOOTH_AGENT_RERANK`, defaulting the rerank
89 /// model. The shared constructor so both the reference server's `ServerConfig`
90 /// and the lambda's `LambdaConfig` select rerank identically.
91 #[must_use]
92 pub fn from_gateway(gateway_url: impl Into<String>, gateway_key: Option<String>) -> Self {
93 Self {
94 gateway_url: gateway_url.into(),
95 gateway_key,
96 model: DEFAULT_RERANK_MODEL.to_string(),
97 mode: Self::mode_from_env(),
98 }
99 }
100
101 /// Build from the server config + `SMOOTH_AGENT_RERANK`, defaulting the rerank
102 /// model.
103 #[must_use]
104 pub fn from_server_config(config: &crate::config::ServerConfig) -> Self {
105 Self::from_gateway(config.gateway_url.clone(), config.gateway_key.clone())
106 }
107}
108
109/// Select the reranker for the retrieval path from configuration.
110///
111/// Returns `None` when rerank is disabled (the default), which the retrieval path
112/// treats as "don't reorder" — keeping default behavior byte-for-byte unchanged.
113/// Returns `Some(reranker)` only when explicitly enabled via `SMOOTH_AGENT_RERANK`:
114///
115/// - `gateway` + a gateway key ⇒ the real [`GatewayReranker`] (production).
116/// - `gateway` without a key, or `lexical` ⇒ the offline [`LexicalReranker`].
117/// - `off` / unset ⇒ `None` (no rerank).
118#[must_use]
119pub fn build_reranker(config: &RerankerConfig) -> Option<Arc<dyn Reranker>> {
120 match config.mode {
121 RerankMode::Off => {
122 tracing::info!("rerank stage disabled (default) — retrieval order unchanged");
123 None
124 }
125 RerankMode::Gateway => match &config.gateway_key {
126 Some(key) if !key.trim().is_empty() => {
127 tracing::info!(
128 model = %config.model,
129 "using GatewayReranker (cross-encoder /v1/rerank) for retrieval reorder"
130 );
131 Some(Arc::new(GatewayReranker::new(
132 config.gateway_url.clone(),
133 key.clone(),
134 config.model.clone(),
135 )))
136 }
137 _ => {
138 tracing::warn!(
139 "SMOOTH_AGENT_RERANK=gateway but no gateway key — \
140 falling back to the offline LexicalReranker"
141 );
142 Some(Arc::new(LexicalReranker::new()))
143 }
144 },
145 RerankMode::Lexical => {
146 tracing::info!(
147 "using offline LexicalReranker (BM25-ish, no network) for retrieval reorder"
148 );
149 Some(Arc::new(LexicalReranker::new()))
150 }
151 }
152}
153
154/// The identity reranker. Exposed so callers that want an explicit no-op (rather
155/// than `None`) can construct one without importing the core crate directly.
156#[must_use]
157pub fn noop_reranker() -> Arc<dyn Reranker> {
158 Arc::new(NoopReranker)
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 fn cfg(mode: RerankMode, key: Option<&str>) -> RerankerConfig {
166 RerankerConfig {
167 gateway_url: "https://example.test/v1".into(),
168 gateway_key: key.map(str::to_string),
169 model: DEFAULT_RERANK_MODEL.to_string(),
170 mode,
171 }
172 }
173
174 #[test]
175 fn default_mode_is_off_yielding_no_reranker() {
176 // The default (Off) ⇒ None, so retrieval behaves exactly as before. This
177 // is what keeps the baseline tests green.
178 assert!(build_reranker(&cfg(RerankMode::Off, Some("sk-test"))).is_none());
179 assert!(build_reranker(&cfg(RerankMode::default(), None)).is_none());
180 }
181
182 #[test]
183 fn gateway_mode_with_key_selects_a_reranker() {
184 // gateway + key ⇒ Some (the real GatewayReranker — no network call is made
185 // at construction, so this is a pure selection assertion).
186 assert!(build_reranker(&cfg(RerankMode::Gateway, Some("sk-test"))).is_some());
187 }
188
189 #[test]
190 fn gateway_mode_without_key_falls_back_to_lexical() {
191 // gateway requested but no key ⇒ still Some (the offline LexicalReranker),
192 // never None and never an unauthenticated gateway call.
193 assert!(build_reranker(&cfg(RerankMode::Gateway, None)).is_some());
194 assert!(build_reranker(&cfg(RerankMode::Gateway, Some(" "))).is_some());
195 }
196
197 #[test]
198 fn lexical_mode_selects_a_reranker_without_a_key() {
199 assert!(build_reranker(&cfg(RerankMode::Lexical, None)).is_some());
200 }
201
202 #[test]
203 fn rerank_mode_parse() {
204 assert_eq!(RerankMode::parse("gateway"), RerankMode::Gateway);
205 assert_eq!(RerankMode::parse("ON"), RerankMode::Gateway);
206 assert_eq!(RerankMode::parse("lexical"), RerankMode::Lexical);
207 assert_eq!(RerankMode::parse("off"), RerankMode::Off);
208 assert_eq!(RerankMode::parse(""), RerankMode::Off);
209 assert_eq!(RerankMode::parse("nonsense"), RerankMode::Off);
210 }
211}