smooth_operator_server/reranker.rs
1//! Reranker selection — the opt-in post-retrieval reorder stage (feature gap G8).
2//!
3//! Hybrid retrieval (dense ∪ sparse → RRF) gives a good rank-ordered top-K, but
4//! the fusion score is a *rank* signal, not a sharp relevance score against the
5//! query. A reranker reorders that candidate set with a cross-encoder before it
6//! reaches the model. Unlike the embedder — which is *required* for dense
7//! retrieval to work at all — the reranker is **opt-in**: the default is the
8//! identity [`NoopReranker`], so wiring the selector in never changes existing
9//! behavior.
10//!
11//! There are three implementations of the
12//! [`Reranker`](smooth_operator::rerank::Reranker) trait in this workspace:
13//!
14//! | Reranker | Network | When |
15//! | ------------------- | ------- | ---------------------------------------------------------- |
16//! | [`GatewayReranker`] | yes | **Production.** Cohere/Voyage `/v1/rerank` over the gateway. |
17//! | [`LexicalReranker`] | no | Offline deterministic reorder (BM25-ish lexical overlap). |
18//! | [`NoopReranker`] | no | **Default.** Identity — rerank is off, order unchanged. |
19//!
20//! [`build_reranker`] picks from configuration, mirroring
21//! [`build_embedder`](crate::embedder::build_embedder):
22//!
23//! - **Keyed** (gateway key present) ⇒ the real [`GatewayReranker`], the
24//! production semantic reorder. Logs a [`tracing::info!`].
25//! - **Unkeyed + lexical requested** (`SMOOTH_AGENT_RERANK=lexical`) ⇒ the
26//! network-free [`LexicalReranker`] for an offline reorder.
27//! - **Unkeyed (default)** ⇒ the identity [`NoopReranker`] — rerank is off, so the
28//! 271-test baseline (and default behavior) is byte-for-byte unchanged. Logs a
29//! [`tracing::info!`] so an operator can see rerank is disabled.
30
31use std::sync::Arc;
32
33use smooth_operator::rerank::{LexicalReranker, NoopReranker, Reranker};
34#[cfg(feature = "postgres")]
35use smooth_operator_adapter_postgres::GatewayReranker;
36
37/// The default rerank model. Re-exported from the postgres adapter on the default
38/// (cloud) build; defined locally on the lean build so the constant — and any
39/// `RerankerConfig` that defaults to it — still resolves without the postgres
40/// crate. The two definitions agree.
41#[cfg(feature = "postgres")]
42pub use smooth_operator_adapter_postgres::DEFAULT_RERANK_MODEL;
43#[cfg(not(feature = "postgres"))]
44pub const DEFAULT_RERANK_MODEL: &str = "rerank-english-v3.0";
45
46/// Inputs the reranker selector needs. A small struct (rather than the whole
47/// [`ServerConfig`](crate::config::ServerConfig)) so other callers can build the
48/// same selector. Mirrors [`EmbedderConfig`](crate::embedder::EmbedderConfig).
49#[derive(Debug, Clone)]
50pub struct RerankerConfig {
51 /// The gateway base URL (e.g. `https://llm.smoo.ai/v1`).
52 pub gateway_url: String,
53 /// The gateway API key. `Some` ⇒ the real [`GatewayReranker`] is eligible.
54 pub gateway_key: Option<String>,
55 /// The rerank model id (e.g. `rerank-english-v3.0`).
56 pub model: String,
57 /// Whether the rerank stage is enabled at all. When `false` (the default), the
58 /// selector returns the identity [`NoopReranker`] regardless of the key, so
59 /// default behavior is unchanged. Driven by `SMOOTH_AGENT_RERANK`.
60 pub mode: RerankMode,
61}
62
63/// Which rerank stage the operator wants. `Off` is the default so the rerank
64/// stage stays opt-in.
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
66pub enum RerankMode {
67 /// Rerank disabled — identity [`NoopReranker`] (default).
68 #[default]
69 Off,
70 /// Gateway cross-encoder if keyed, else fall back to lexical/noop.
71 Gateway,
72 /// Force the offline deterministic [`LexicalReranker`] (no network).
73 Lexical,
74}
75
76impl RerankMode {
77 /// Parse the `SMOOTH_AGENT_RERANK` env value. Unknown/empty ⇒ [`Off`](Self::Off).
78 #[must_use]
79 pub fn parse(s: &str) -> Self {
80 match s.trim().to_ascii_lowercase().as_str() {
81 "gateway" | "on" | "1" | "true" => Self::Gateway,
82 "lexical" => Self::Lexical,
83 _ => Self::Off,
84 }
85 }
86}
87
88impl RerankerConfig {
89 /// Read the rerank mode from `SMOOTH_AGENT_RERANK` (unset ⇒ [`Off`](RerankMode::Off)).
90 #[must_use]
91 pub fn mode_from_env() -> RerankMode {
92 std::env::var("SMOOTH_AGENT_RERANK")
93 .ok()
94 .map(|s| RerankMode::parse(&s))
95 .unwrap_or_default()
96 }
97
98 /// Build from the gateway parts + `SMOOTH_AGENT_RERANK`, defaulting the rerank
99 /// model. The shared constructor so both the reference server's `ServerConfig`
100 /// and the lambda's `LambdaConfig` select rerank identically.
101 #[must_use]
102 pub fn from_gateway(gateway_url: impl Into<String>, gateway_key: Option<String>) -> Self {
103 Self {
104 gateway_url: gateway_url.into(),
105 gateway_key,
106 model: DEFAULT_RERANK_MODEL.to_string(),
107 mode: Self::mode_from_env(),
108 }
109 }
110
111 /// Build from the server config + `SMOOTH_AGENT_RERANK`, defaulting the rerank
112 /// model.
113 #[must_use]
114 pub fn from_server_config(config: &crate::config::ServerConfig) -> Self {
115 Self::from_gateway(config.gateway_url.clone(), config.gateway_key.clone())
116 }
117}
118
119/// Select the reranker for the retrieval path from configuration.
120///
121/// Returns `None` when rerank is disabled (the default), which the retrieval path
122/// treats as "don't reorder" — keeping default behavior byte-for-byte unchanged.
123/// Returns `Some(reranker)` only when explicitly enabled via `SMOOTH_AGENT_RERANK`:
124///
125/// - `gateway` + a gateway key ⇒ the real [`GatewayReranker`] (production).
126/// - `gateway` without a key, or `lexical` ⇒ the offline [`LexicalReranker`].
127/// - `off` / unset ⇒ `None` (no rerank).
128#[must_use]
129pub fn build_reranker(config: &RerankerConfig) -> Option<Arc<dyn Reranker>> {
130 match config.mode {
131 RerankMode::Off => {
132 tracing::info!("rerank stage disabled (default) — retrieval order unchanged");
133 None
134 }
135 RerankMode::Gateway => match &config.gateway_key {
136 // The real GatewayReranker lives in the postgres adapter crate, so
137 // it's only available on a build with the `postgres` feature (the
138 // default / cloud build). On a lean `--no-default-features` build this
139 // arm is compiled out and gateway mode falls back to the offline
140 // LexicalReranker below regardless of the key.
141 #[cfg(feature = "postgres")]
142 Some(key) if !key.trim().is_empty() => {
143 tracing::info!(
144 model = %config.model,
145 "using GatewayReranker (cross-encoder /v1/rerank) for retrieval reorder"
146 );
147 Some(Arc::new(GatewayReranker::new(
148 config.gateway_url.clone(),
149 key.clone(),
150 config.model.clone(),
151 )))
152 }
153 _ => {
154 tracing::warn!(
155 "SMOOTH_AGENT_RERANK=gateway but no GatewayReranker available \
156 (no gateway key, or a lean build without the `postgres` feature) — \
157 falling back to the offline LexicalReranker"
158 );
159 Some(Arc::new(LexicalReranker::new()))
160 }
161 },
162 RerankMode::Lexical => {
163 tracing::info!(
164 "using offline LexicalReranker (BM25-ish, no network) for retrieval reorder"
165 );
166 Some(Arc::new(LexicalReranker::new()))
167 }
168 }
169}
170
171/// The identity reranker. Exposed so callers that want an explicit no-op (rather
172/// than `None`) can construct one without importing the core crate directly.
173#[must_use]
174pub fn noop_reranker() -> Arc<dyn Reranker> {
175 Arc::new(NoopReranker)
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181
182 fn cfg(mode: RerankMode, key: Option<&str>) -> RerankerConfig {
183 RerankerConfig {
184 gateway_url: "https://example.test/v1".into(),
185 gateway_key: key.map(str::to_string),
186 model: DEFAULT_RERANK_MODEL.to_string(),
187 mode,
188 }
189 }
190
191 #[test]
192 fn default_mode_is_off_yielding_no_reranker() {
193 // The default (Off) ⇒ None, so retrieval behaves exactly as before. This
194 // is what keeps the baseline tests green.
195 assert!(build_reranker(&cfg(RerankMode::Off, Some("sk-test"))).is_none());
196 assert!(build_reranker(&cfg(RerankMode::default(), None)).is_none());
197 }
198
199 #[test]
200 fn gateway_mode_with_key_selects_a_reranker() {
201 // gateway + key ⇒ Some (the real GatewayReranker — no network call is made
202 // at construction, so this is a pure selection assertion).
203 assert!(build_reranker(&cfg(RerankMode::Gateway, Some("sk-test"))).is_some());
204 }
205
206 #[test]
207 fn gateway_mode_without_key_falls_back_to_lexical() {
208 // gateway requested but no key ⇒ still Some (the offline LexicalReranker),
209 // never None and never an unauthenticated gateway call.
210 assert!(build_reranker(&cfg(RerankMode::Gateway, None)).is_some());
211 assert!(build_reranker(&cfg(RerankMode::Gateway, Some(" "))).is_some());
212 }
213
214 #[test]
215 fn lexical_mode_selects_a_reranker_without_a_key() {
216 assert!(build_reranker(&cfg(RerankMode::Lexical, None)).is_some());
217 }
218
219 #[test]
220 fn rerank_mode_parse() {
221 assert_eq!(RerankMode::parse("gateway"), RerankMode::Gateway);
222 assert_eq!(RerankMode::parse("ON"), RerankMode::Gateway);
223 assert_eq!(RerankMode::parse("lexical"), RerankMode::Lexical);
224 assert_eq!(RerankMode::parse("off"), RerankMode::Off);
225 assert_eq!(RerankMode::parse(""), RerankMode::Off);
226 assert_eq!(RerankMode::parse("nonsense"), RerankMode::Off);
227 }
228}