Skip to main content

mnm_core/
rerank.rs

1//! Shared reranking vocabulary for the `VoyageAI` reranking design.
2//!
3//! Used identically by the server's inline rerank stage and by clients
4//! reranking locally (BYOK), so the same search reranks the same way
5//! regardless of placement.
6
7use serde::{Deserialize, Serialize};
8
9/// Hard cap on agent-supplied rerank instructions, in characters.
10///
11/// The instruction is multiplied by the candidate-pool size in Voyage's token
12/// formula (`query_tokens × num_documents`), so length is a direct cost lever.
13pub const MAX_INSTRUCTION_CHARS: usize = 400;
14
15/// The `rerank` request parameter: which Voyage model to rerank with, or none.
16///
17/// Omitting the parameter defaults to the full model (`rerank-2.5`). Clients
18/// reranking locally always send `none` (one rerank pass, structurally).
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
20pub enum RerankParam {
21    /// `VoyageAI` `rerank-2.5` (the default; full quality).
22    #[default]
23    #[serde(rename = "rerank-2.5")]
24    Rerank25,
25    /// `VoyageAI` `rerank-2.5-lite` (lower latency; billed at half tokens, D5).
26    #[serde(rename = "rerank-2.5-lite")]
27    Rerank25Lite,
28    /// No server-side reranking (RRF order).
29    #[serde(rename = "none")]
30    None,
31}
32
33impl RerankParam {
34    /// The Voyage model name to call, or `None` when reranking is off.
35    #[must_use]
36    pub const fn model_name(self) -> Option<&'static str> {
37        match self {
38            Self::Rerank25 => Some("rerank-2.5"),
39            Self::Rerank25Lite => Some("rerank-2.5-lite"),
40            Self::None => None,
41        }
42    }
43
44    /// Billed-equivalent tokens for a Voyage-reported `total_tokens` (D5):
45    /// `rerank-2.5-lite` is charged at `ceil(total / 2)` — mirroring Voyage's
46    /// half-rate pricing for lite — everything else at face value.
47    #[must_use]
48    pub const fn billed_tokens(self, total_tokens: u64) -> u64 {
49        match self {
50            Self::Rerank25Lite => total_tokens.div_ceil(2),
51            _ => total_tokens,
52        }
53    }
54}
55
56/// The closed set of `search_metadata.rerank.reason` values the server emits.
57///
58/// Emitted when a rerank is not applied (see `midnight-manual-server` `routes::search`).
59/// Clients copy this field into the FR-109 `Rerank` telemetry event, so it must
60/// stay a known-value allow-list — never free-form server text — to preserve
61/// the telemetry module's privacy-by-construction invariant.
62pub const RERANK_REASONS: &[&str] = &[
63    "not_requested",
64    "token_budget_exhausted",
65    "provider_error",
66    "disabled",
67];
68
69/// Map a raw `search_metadata.rerank.reason` string from a server response to a
70/// known reason wire value, returning `None` for anything outside the closed
71/// [`RERANK_REASONS`] set.
72///
73/// This is the privacy gate on the telemetry path: a client reads the reason
74/// off the (untrusted) server response and would otherwise copy arbitrary text
75/// into a `Rerank` event. Routing it through this allow-list means only the
76/// documented closed set can ever reach the wire — an unrecognized value is
77/// dropped rather than echoed.
78#[must_use]
79pub fn known_reason(raw: &str) -> Option<&'static str> {
80    RERANK_REASONS.iter().copied().find(|&r| r == raw)
81}
82
83/// Validate an agent-supplied instruction against [`MAX_INSTRUCTION_CHARS`].
84///
85/// # Errors
86///
87/// Returns a human-readable message naming the cap when the instruction is too
88/// long (callers reject with 400 / `InvalidInput` — never truncate silently).
89pub fn validate_instruction(instruction: &str) -> Result<(), String> {
90    let n = instruction.chars().count();
91    if n > MAX_INSTRUCTION_CHARS {
92        return Err(format!(
93            "rerank_instructions is {n} characters; the cap is {MAX_INSTRUCTION_CHARS}. \
94             Shorter instructions also cost fewer tokens (the instruction is \
95             multiplied by the candidate-pool size)."
96        ));
97    }
98    Ok(())
99}
100
101/// Derive the default rerank instruction from request shape (spec §3).
102///
103/// `code_exclusive` is `code_mode == exclusive`; `version` is the first
104/// `language_target` filter's `(name, version_satisfies)` when both are
105/// present. Deliberately minimal: every default token is multiplied by ~50
106/// docs per search. Agent-supplied instructions replace this wholesale (D4).
107#[must_use]
108pub fn default_instruction(code_exclusive: bool, version: Option<(&str, &str)>) -> Option<String> {
109    let mut parts: Vec<String> = Vec::new();
110    if code_exclusive {
111        parts.push(
112            "Prioritize chunks containing code examples, function signatures, and API usage \
113             over prose."
114                .to_owned(),
115        );
116    }
117    if let Some((name, ver)) = version {
118        parts.push(format!(
119            "Prefer content applying to {name} version {ver}; deprioritize other versions."
120        ));
121    }
122    if parts.is_empty() {
123        None
124    } else {
125        Some(parts.join(" "))
126    }
127}
128
129/// Compose the query text sent to Voyage `/v1/rerank`.
130///
131/// The instruction (when present and non-blank) is appended to the query on a
132/// labelled second line — Voyage's documented convention is natural-language
133/// instructions appended or prepended to the query string (instructions are
134/// NOT an API parameter).
135#[must_use]
136pub fn compose_rerank_query(query: &str, instruction: Option<&str>) -> String {
137    match instruction.map(str::trim) {
138        Some(i) if !i.is_empty() => format!("{query}\nInstructions: {i}"),
139        _ => query.to_owned(),
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn rerank_param_wire_values_round_trip() {
149        for (variant, wire) in [
150            (RerankParam::Rerank25, "\"rerank-2.5\""),
151            (RerankParam::Rerank25Lite, "\"rerank-2.5-lite\""),
152            (RerankParam::None, "\"none\""),
153        ] {
154            assert_eq!(serde_json::to_string(&variant).unwrap(), wire);
155            let back: RerankParam = serde_json::from_str(wire).unwrap();
156            assert_eq!(back, variant);
157        }
158        // Default (omitted on the wire) is the full model.
159        assert_eq!(RerankParam::default(), RerankParam::Rerank25);
160    }
161
162    #[test]
163    fn model_name_is_none_only_for_none() {
164        assert_eq!(RerankParam::Rerank25.model_name(), Some("rerank-2.5"));
165        assert_eq!(RerankParam::Rerank25Lite.model_name(), Some("rerank-2.5-lite"));
166        assert_eq!(RerankParam::None.model_name(), None);
167    }
168
169    #[test]
170    fn lite_bills_half_rounded_up() {
171        // D5: lite charges ceil(total/2); the full model charges face value.
172        assert_eq!(RerankParam::Rerank25.billed_tokens(1001), 1001);
173        assert_eq!(RerankParam::Rerank25Lite.billed_tokens(1000), 500);
174        assert_eq!(RerankParam::Rerank25Lite.billed_tokens(1001), 501);
175        assert_eq!(RerankParam::Rerank25Lite.billed_tokens(0), 0);
176        assert_eq!(RerankParam::Rerank25Lite.billed_tokens(1), 1);
177        // None never reaches billing, but must not panic.
178        assert_eq!(RerankParam::None.billed_tokens(10), 10);
179    }
180
181    #[test]
182    fn instruction_cap_is_400_chars() {
183        assert!(validate_instruction(&"x".repeat(400)).is_ok());
184        let err = validate_instruction(&"x".repeat(401)).unwrap_err();
185        assert!(err.contains("400"), "error should name the cap: {err}");
186        // Cap counts chars, not bytes (a 200-char multibyte string passes).
187        assert!(validate_instruction(&"é".repeat(400)).is_ok());
188    }
189
190    #[test]
191    fn default_instruction_rule_table() {
192        // No condition -> bare query (None).
193        assert_eq!(default_instruction(false, None), None);
194        // code_mode exclusive -> code-focused instruction.
195        let code = default_instruction(true, None).unwrap();
196        assert!(code.contains("code examples"));
197        // Version filter -> version preference, naming language + version.
198        let ver = default_instruction(false, Some(("compact", "0.31"))).unwrap();
199        assert!(ver.contains("compact") && ver.contains("0.31"));
200        // Both -> both sentences concatenated (non-contradictory by construction).
201        let both = default_instruction(true, Some(("compact", "0.31"))).unwrap();
202        assert!(both.contains("code examples") && both.contains("0.31"));
203    }
204
205    #[test]
206    fn known_reason_passes_closed_set_and_drops_others() {
207        // Every documented server reason maps to itself (interned to 'static).
208        for r in RERANK_REASONS {
209            assert_eq!(known_reason(r), Some(*r));
210        }
211        // Anything outside the closed set is dropped — never echoed into an Event.
212        assert_eq!(known_reason(""), None);
213        assert_eq!(known_reason("applied"), None);
214        assert_eq!(known_reason("Not_Requested"), None); // case-sensitive
215        assert_eq!(known_reason("rate limited: token=eyJhbGci"), None);
216    }
217
218    #[test]
219    fn compose_appends_instruction_to_query() {
220        assert_eq!(compose_rerank_query("how do circuits work", None), "how do circuits work");
221        assert_eq!(compose_rerank_query("q", Some("   ")), "q");
222        let composed = compose_rerank_query("q", Some("Prioritize code."));
223        assert_eq!(composed, "q\nInstructions: Prioritize code.");
224    }
225}