Skip to main content

smooth_operator_server/
reranker.rs

1//! Reranker selection — the opt-in post-retrieval reorder stage (feature gap G8).
2//!
3//! Hybrid retrieval (dense ∪ sparse → RRF) gives a good rank-ordered top-K, but
4//! the fusion score is a *rank* signal, not a sharp relevance score against the
5//! query. A reranker reorders that candidate set with a cross-encoder before it
6//! reaches the model. Unlike the embedder — which is *required* for dense
7//! retrieval to work at all — the reranker is **opt-in**: the default is the
8//! identity [`NoopReranker`], so wiring the selector in never changes existing
9//! behavior.
10//!
11//! There are three implementations of the
12//! [`Reranker`](smooth_operator::rerank::Reranker) trait in this workspace:
13//!
14//! | Reranker            | Network | When                                                       |
15//! | ------------------- | ------- | ---------------------------------------------------------- |
16//! | [`GatewayReranker`] | yes     | **Production.** Cohere/Voyage `/v1/rerank` over the gateway. |
17//! | [`LexicalReranker`] | no      | Offline deterministic reorder (BM25-ish lexical overlap).  |
18//! | [`NoopReranker`]    | no      | **Default.** Identity — rerank is off, order unchanged.    |
19//!
20//! [`build_reranker`] picks from configuration, mirroring
21//! [`build_embedder`](crate::embedder::build_embedder):
22//!
23//! - **Keyed** (gateway key present) ⇒ the real [`GatewayReranker`], the
24//!   production semantic reorder. Logs a [`tracing::info!`].
25//! - **Unkeyed + lexical requested** (`SMOOTH_AGENT_RERANK=lexical`) ⇒ the
26//!   network-free [`LexicalReranker`] for an offline reorder.
27//! - **Unkeyed (default)** ⇒ the identity [`NoopReranker`] — rerank is off, so the
28//!   271-test baseline (and default behavior) is byte-for-byte unchanged. Logs a
29//!   [`tracing::info!`] so an operator can see rerank is disabled.
30
31use std::sync::Arc;
32
33use smooth_operator::rerank::{LexicalReranker, NoopReranker, Reranker};
34#[cfg(feature = "postgres")]
35use smooth_operator_adapter_postgres::GatewayReranker;
36
37/// The default rerank model. Re-exported from the postgres adapter on the default
38/// (cloud) build; defined locally on the lean build so the constant — and any
39/// `RerankerConfig` that defaults to it — still resolves without the postgres
40/// crate. The two definitions agree.
41#[cfg(feature = "postgres")]
42pub use smooth_operator_adapter_postgres::DEFAULT_RERANK_MODEL;
43#[cfg(not(feature = "postgres"))]
44pub const DEFAULT_RERANK_MODEL: &str = "rerank-english-v3.0";
45
46/// Inputs the reranker selector needs. A small struct (rather than the whole
47/// [`ServerConfig`](crate::config::ServerConfig)) so other callers can build the
48/// same selector. Mirrors [`EmbedderConfig`](crate::embedder::EmbedderConfig).
49#[derive(Debug, Clone)]
50pub struct RerankerConfig {
51    /// The gateway base URL (e.g. `https://llm.smoo.ai/v1`).
52    pub gateway_url: String,
53    /// The gateway API key. `Some` ⇒ the real [`GatewayReranker`] is eligible.
54    pub gateway_key: Option<String>,
55    /// The rerank model id (e.g. `rerank-english-v3.0`).
56    pub model: String,
57    /// Whether the rerank stage is enabled at all. When `false` (the default), the
58    /// selector returns the identity [`NoopReranker`] regardless of the key, so
59    /// default behavior is unchanged. Driven by `SMOOTH_AGENT_RERANK`.
60    pub mode: RerankMode,
61}
62
63/// Which rerank stage the operator wants. `Off` is the default so the rerank
64/// stage stays opt-in.
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
66pub enum RerankMode {
67    /// Rerank disabled — identity [`NoopReranker`] (default).
68    #[default]
69    Off,
70    /// Gateway cross-encoder if keyed, else fall back to lexical/noop.
71    Gateway,
72    /// Force the offline deterministic [`LexicalReranker`] (no network).
73    Lexical,
74}
75
76impl RerankMode {
77    /// Parse the `SMOOTH_AGENT_RERANK` env value. Unknown/empty ⇒ [`Off`](Self::Off).
78    #[must_use]
79    pub fn parse(s: &str) -> Self {
80        match s.trim().to_ascii_lowercase().as_str() {
81            "gateway" | "on" | "1" | "true" => Self::Gateway,
82            "lexical" => Self::Lexical,
83            _ => Self::Off,
84        }
85    }
86}
87
88impl RerankerConfig {
89    /// Read the rerank mode from `SMOOTH_AGENT_RERANK` (unset ⇒ [`Off`](RerankMode::Off)).
90    #[must_use]
91    pub fn mode_from_env() -> RerankMode {
92        std::env::var("SMOOTH_AGENT_RERANK")
93            .ok()
94            .map(|s| RerankMode::parse(&s))
95            .unwrap_or_default()
96    }
97
98    /// Build from the gateway parts + `SMOOTH_AGENT_RERANK`, defaulting the rerank
99    /// model. The shared constructor so both the reference server's `ServerConfig`
100    /// and the lambda's `LambdaConfig` select rerank identically.
101    #[must_use]
102    pub fn from_gateway(gateway_url: impl Into<String>, gateway_key: Option<String>) -> Self {
103        Self {
104            gateway_url: gateway_url.into(),
105            gateway_key,
106            model: DEFAULT_RERANK_MODEL.to_string(),
107            mode: Self::mode_from_env(),
108        }
109    }
110
111    /// Build from the server config + `SMOOTH_AGENT_RERANK`, defaulting the rerank
112    /// model.
113    #[must_use]
114    pub fn from_server_config(config: &crate::config::ServerConfig) -> Self {
115        Self::from_gateway(config.gateway_url.clone(), config.gateway_key.clone())
116    }
117}
118
119/// Select the reranker for the retrieval path from configuration.
120///
121/// Returns `None` when rerank is disabled (the default), which the retrieval path
122/// treats as "don't reorder" — keeping default behavior byte-for-byte unchanged.
123/// Returns `Some(reranker)` only when explicitly enabled via `SMOOTH_AGENT_RERANK`:
124///
125/// - `gateway` + a gateway key ⇒ the real [`GatewayReranker`] (production).
126/// - `gateway` without a key, or `lexical` ⇒ the offline [`LexicalReranker`].
127/// - `off` / unset ⇒ `None` (no rerank).
128#[must_use]
129pub fn build_reranker(config: &RerankerConfig) -> Option<Arc<dyn Reranker>> {
130    match config.mode {
131        RerankMode::Off => {
132            tracing::info!("rerank stage disabled (default) — retrieval order unchanged");
133            None
134        }
135        RerankMode::Gateway => match &config.gateway_key {
136            // The real GatewayReranker lives in the postgres adapter crate, so
137            // it's only available on a build with the `postgres` feature (the
138            // default / cloud build). On a lean `--no-default-features` build this
139            // arm is compiled out and gateway mode falls back to the offline
140            // LexicalReranker below regardless of the key.
141            #[cfg(feature = "postgres")]
142            Some(key) if !key.trim().is_empty() => {
143                tracing::info!(
144                    model = %config.model,
145                    "using GatewayReranker (cross-encoder /v1/rerank) for retrieval reorder"
146                );
147                Some(Arc::new(GatewayReranker::new(
148                    config.gateway_url.clone(),
149                    key.clone(),
150                    config.model.clone(),
151                )))
152            }
153            _ => {
154                tracing::warn!(
155                    "SMOOTH_AGENT_RERANK=gateway but no GatewayReranker available \
156                     (no gateway key, or a lean build without the `postgres` feature) — \
157                     falling back to the offline LexicalReranker"
158                );
159                Some(Arc::new(LexicalReranker::new()))
160            }
161        },
162        RerankMode::Lexical => {
163            tracing::info!(
164                "using offline LexicalReranker (BM25-ish, no network) for retrieval reorder"
165            );
166            Some(Arc::new(LexicalReranker::new()))
167        }
168    }
169}
170
171/// The identity reranker. Exposed so callers that want an explicit no-op (rather
172/// than `None`) can construct one without importing the core crate directly.
173#[must_use]
174pub fn noop_reranker() -> Arc<dyn Reranker> {
175    Arc::new(NoopReranker)
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    fn cfg(mode: RerankMode, key: Option<&str>) -> RerankerConfig {
183        RerankerConfig {
184            gateway_url: "https://example.test/v1".into(),
185            gateway_key: key.map(str::to_string),
186            model: DEFAULT_RERANK_MODEL.to_string(),
187            mode,
188        }
189    }
190
191    #[test]
192    fn default_mode_is_off_yielding_no_reranker() {
193        // The default (Off) ⇒ None, so retrieval behaves exactly as before. This
194        // is what keeps the baseline tests green.
195        assert!(build_reranker(&cfg(RerankMode::Off, Some("sk-test"))).is_none());
196        assert!(build_reranker(&cfg(RerankMode::default(), None)).is_none());
197    }
198
199    #[test]
200    fn gateway_mode_with_key_selects_a_reranker() {
201        // gateway + key ⇒ Some (the real GatewayReranker — no network call is made
202        // at construction, so this is a pure selection assertion).
203        assert!(build_reranker(&cfg(RerankMode::Gateway, Some("sk-test"))).is_some());
204    }
205
206    #[test]
207    fn gateway_mode_without_key_falls_back_to_lexical() {
208        // gateway requested but no key ⇒ still Some (the offline LexicalReranker),
209        // never None and never an unauthenticated gateway call.
210        assert!(build_reranker(&cfg(RerankMode::Gateway, None)).is_some());
211        assert!(build_reranker(&cfg(RerankMode::Gateway, Some("  "))).is_some());
212    }
213
214    #[test]
215    fn lexical_mode_selects_a_reranker_without_a_key() {
216        assert!(build_reranker(&cfg(RerankMode::Lexical, None)).is_some());
217    }
218
219    #[test]
220    fn rerank_mode_parse() {
221        assert_eq!(RerankMode::parse("gateway"), RerankMode::Gateway);
222        assert_eq!(RerankMode::parse("ON"), RerankMode::Gateway);
223        assert_eq!(RerankMode::parse("lexical"), RerankMode::Lexical);
224        assert_eq!(RerankMode::parse("off"), RerankMode::Off);
225        assert_eq!(RerankMode::parse(""), RerankMode::Off);
226        assert_eq!(RerankMode::parse("nonsense"), RerankMode::Off);
227    }
228}