Skip to main content

smooth_operator_server/
reranker.rs

1//! Reranker selection — the opt-in post-retrieval reorder stage (feature gap G8).
2//!
3//! Hybrid retrieval (dense ∪ sparse → RRF) gives a good rank-ordered top-K, but
4//! the fusion score is a *rank* signal, not a sharp relevance score against the
5//! query. A reranker reorders that candidate set with a cross-encoder before it
6//! reaches the model. Unlike the embedder — which is *required* for dense
7//! retrieval to work at all — the reranker is **opt-in**: the default is the
8//! identity [`NoopReranker`], so wiring the selector in never changes existing
9//! behavior.
10//!
11//! There are three implementations of the
12//! [`Reranker`](smooth_operator::rerank::Reranker) trait in this workspace:
13//!
14//! | Reranker            | Network | When                                                       |
15//! | ------------------- | ------- | ---------------------------------------------------------- |
16//! | [`GatewayReranker`] | yes     | **Production.** Cohere/Voyage `/v1/rerank` over the gateway. |
17//! | [`LexicalReranker`] | no      | Offline deterministic reorder (BM25-ish lexical overlap).  |
18//! | [`NoopReranker`]    | no      | **Default.** Identity — rerank is off, order unchanged.    |
19//!
20//! [`build_reranker`] picks from configuration, mirroring
21//! [`build_embedder`](crate::embedder::build_embedder):
22//!
23//! - **Keyed** (gateway key present) ⇒ the real [`GatewayReranker`], the
24//!   production semantic reorder. Logs a [`tracing::info!`].
25//! - **Unkeyed + lexical requested** (`SMOOTH_AGENT_RERANK=lexical`) ⇒ the
26//!   network-free [`LexicalReranker`] for an offline reorder.
27//! - **Unkeyed (default)** ⇒ the identity [`NoopReranker`] — rerank is off, so the
28//!   271-test baseline (and default behavior) is byte-for-byte unchanged. Logs a
29//!   [`tracing::info!`] so an operator can see rerank is disabled.
30
31use std::sync::Arc;
32
33use smooth_operator::rerank::{LexicalReranker, NoopReranker, Reranker};
34use smooth_operator_adapter_postgres::{GatewayReranker, DEFAULT_RERANK_MODEL};
35
36/// Inputs the reranker selector needs. A small struct (rather than the whole
37/// [`ServerConfig`](crate::config::ServerConfig)) so other callers can build the
38/// same selector. Mirrors [`EmbedderConfig`](crate::embedder::EmbedderConfig).
39#[derive(Debug, Clone)]
40pub struct RerankerConfig {
41    /// The gateway base URL (e.g. `https://llm.smoo.ai/v1`).
42    pub gateway_url: String,
43    /// The gateway API key. `Some` ⇒ the real [`GatewayReranker`] is eligible.
44    pub gateway_key: Option<String>,
45    /// The rerank model id (e.g. `rerank-english-v3.0`).
46    pub model: String,
47    /// Whether the rerank stage is enabled at all. When `false` (the default), the
48    /// selector returns the identity [`NoopReranker`] regardless of the key, so
49    /// default behavior is unchanged. Driven by `SMOOTH_AGENT_RERANK`.
50    pub mode: RerankMode,
51}
52
53/// Which rerank stage the operator wants. `Off` is the default so the rerank
54/// stage stays opt-in.
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
56pub enum RerankMode {
57    /// Rerank disabled — identity [`NoopReranker`] (default).
58    #[default]
59    Off,
60    /// Gateway cross-encoder if keyed, else fall back to lexical/noop.
61    Gateway,
62    /// Force the offline deterministic [`LexicalReranker`] (no network).
63    Lexical,
64}
65
66impl RerankMode {
67    /// Parse the `SMOOTH_AGENT_RERANK` env value. Unknown/empty ⇒ [`Off`](Self::Off).
68    #[must_use]
69    pub fn parse(s: &str) -> Self {
70        match s.trim().to_ascii_lowercase().as_str() {
71            "gateway" | "on" | "1" | "true" => Self::Gateway,
72            "lexical" => Self::Lexical,
73            _ => Self::Off,
74        }
75    }
76}
77
78impl RerankerConfig {
79    /// Read the rerank mode from `SMOOTH_AGENT_RERANK` (unset ⇒ [`Off`](RerankMode::Off)).
80    #[must_use]
81    pub fn mode_from_env() -> RerankMode {
82        std::env::var("SMOOTH_AGENT_RERANK")
83            .ok()
84            .map(|s| RerankMode::parse(&s))
85            .unwrap_or_default()
86    }
87
88    /// Build from the gateway parts + `SMOOTH_AGENT_RERANK`, defaulting the rerank
89    /// model. The shared constructor so both the reference server's `ServerConfig`
90    /// and the lambda's `LambdaConfig` select rerank identically.
91    #[must_use]
92    pub fn from_gateway(gateway_url: impl Into<String>, gateway_key: Option<String>) -> Self {
93        Self {
94            gateway_url: gateway_url.into(),
95            gateway_key,
96            model: DEFAULT_RERANK_MODEL.to_string(),
97            mode: Self::mode_from_env(),
98        }
99    }
100
101    /// Build from the server config + `SMOOTH_AGENT_RERANK`, defaulting the rerank
102    /// model.
103    #[must_use]
104    pub fn from_server_config(config: &crate::config::ServerConfig) -> Self {
105        Self::from_gateway(config.gateway_url.clone(), config.gateway_key.clone())
106    }
107}
108
109/// Select the reranker for the retrieval path from configuration.
110///
111/// Returns `None` when rerank is disabled (the default), which the retrieval path
112/// treats as "don't reorder" — keeping default behavior byte-for-byte unchanged.
113/// Returns `Some(reranker)` only when explicitly enabled via `SMOOTH_AGENT_RERANK`:
114///
115/// - `gateway` + a gateway key ⇒ the real [`GatewayReranker`] (production).
116/// - `gateway` without a key, or `lexical` ⇒ the offline [`LexicalReranker`].
117/// - `off` / unset ⇒ `None` (no rerank).
118#[must_use]
119pub fn build_reranker(config: &RerankerConfig) -> Option<Arc<dyn Reranker>> {
120    match config.mode {
121        RerankMode::Off => {
122            tracing::info!("rerank stage disabled (default) — retrieval order unchanged");
123            None
124        }
125        RerankMode::Gateway => match &config.gateway_key {
126            Some(key) if !key.trim().is_empty() => {
127                tracing::info!(
128                    model = %config.model,
129                    "using GatewayReranker (cross-encoder /v1/rerank) for retrieval reorder"
130                );
131                Some(Arc::new(GatewayReranker::new(
132                    config.gateway_url.clone(),
133                    key.clone(),
134                    config.model.clone(),
135                )))
136            }
137            _ => {
138                tracing::warn!(
139                    "SMOOTH_AGENT_RERANK=gateway but no gateway key — \
140                     falling back to the offline LexicalReranker"
141                );
142                Some(Arc::new(LexicalReranker::new()))
143            }
144        },
145        RerankMode::Lexical => {
146            tracing::info!(
147                "using offline LexicalReranker (BM25-ish, no network) for retrieval reorder"
148            );
149            Some(Arc::new(LexicalReranker::new()))
150        }
151    }
152}
153
154/// The identity reranker. Exposed so callers that want an explicit no-op (rather
155/// than `None`) can construct one without importing the core crate directly.
156#[must_use]
157pub fn noop_reranker() -> Arc<dyn Reranker> {
158    Arc::new(NoopReranker)
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    fn cfg(mode: RerankMode, key: Option<&str>) -> RerankerConfig {
166        RerankerConfig {
167            gateway_url: "https://example.test/v1".into(),
168            gateway_key: key.map(str::to_string),
169            model: DEFAULT_RERANK_MODEL.to_string(),
170            mode,
171        }
172    }
173
174    #[test]
175    fn default_mode_is_off_yielding_no_reranker() {
176        // The default (Off) ⇒ None, so retrieval behaves exactly as before. This
177        // is what keeps the baseline tests green.
178        assert!(build_reranker(&cfg(RerankMode::Off, Some("sk-test"))).is_none());
179        assert!(build_reranker(&cfg(RerankMode::default(), None)).is_none());
180    }
181
182    #[test]
183    fn gateway_mode_with_key_selects_a_reranker() {
184        // gateway + key ⇒ Some (the real GatewayReranker — no network call is made
185        // at construction, so this is a pure selection assertion).
186        assert!(build_reranker(&cfg(RerankMode::Gateway, Some("sk-test"))).is_some());
187    }
188
189    #[test]
190    fn gateway_mode_without_key_falls_back_to_lexical() {
191        // gateway requested but no key ⇒ still Some (the offline LexicalReranker),
192        // never None and never an unauthenticated gateway call.
193        assert!(build_reranker(&cfg(RerankMode::Gateway, None)).is_some());
194        assert!(build_reranker(&cfg(RerankMode::Gateway, Some("  "))).is_some());
195    }
196
197    #[test]
198    fn lexical_mode_selects_a_reranker_without_a_key() {
199        assert!(build_reranker(&cfg(RerankMode::Lexical, None)).is_some());
200    }
201
202    #[test]
203    fn rerank_mode_parse() {
204        assert_eq!(RerankMode::parse("gateway"), RerankMode::Gateway);
205        assert_eq!(RerankMode::parse("ON"), RerankMode::Gateway);
206        assert_eq!(RerankMode::parse("lexical"), RerankMode::Lexical);
207        assert_eq!(RerankMode::parse("off"), RerankMode::Off);
208        assert_eq!(RerankMode::parse(""), RerankMode::Off);
209        assert_eq!(RerankMode::parse("nonsense"), RerankMode::Off);
210    }
211}