rerank_blend/lib.rs
1//! # rerank-blend
2//!
3//! Blend N reranker score streams into one final ranking.
4//!
5//! Two strategies:
6//!
7//! - [`blend_weighted`] — min-max-normalize each stream, multiply by
8//! its weight, sum, return sorted descending.
9//! - [`blend_rrf`] — Reciprocal Rank Fusion (Cormack et al. 2009):
10//! each stream contributes `1 / (k + rank)`, default `k = 60`.
11//! Score-free; works across heterogeneous score scales.
12//!
13//! ## Example
14//!
15//! ```
16//! use rerank_blend::{blend_weighted, blend_rrf, RrfOpts};
17//!
18//! let dense = vec![("a", 0.91), ("b", 0.88), ("c", 0.55)];
19//! let bm25 = vec![("a", 12.3), ("c", 8.1), ("b", 3.4)];
20//!
21//! let blended = blend_weighted(&[(&dense, 0.7), (&bm25, 0.3)]);
22//! assert_eq!(blended[0].0, "a");
23//!
24//! let rrf = blend_rrf(&[&dense, &bm25], RrfOpts::default());
25//! assert_eq!(rrf[0].0, "a");
26//! ```
27
28#![deny(missing_docs)]
29
30use std::collections::HashMap;
31use std::hash::Hash;
32
33/// Options for Reciprocal Rank Fusion.
34#[derive(Debug, Clone, Copy)]
35pub struct RrfOpts {
36 /// The `k` smoothing constant. Cormack's paper recommends 60.
37 pub k: f32,
38}
39
40impl Default for RrfOpts {
41 fn default() -> Self {
42 Self { k: 60.0 }
43 }
44}
45
46/// Weighted blend with per-stream min-max normalization. Streams whose
47/// max == min contribute zero to ensure the normalization is well-defined.
48///
49/// Input: a slice of `(stream, weight)` pairs. Each stream is a slice
50/// of `(id, score)` pre-sorted in any order.
51///
52/// Output: sorted descending by the blended score.
53pub fn blend_weighted<'a, K: Eq + Hash + Clone + 'a>(
54 streams: &[(&'a [(K, f32)], f32)],
55) -> Vec<(K, f32)> {
56 let mut totals: HashMap<K, f32> = HashMap::new();
57 for (stream, weight) in streams {
58 if stream.is_empty() {
59 continue;
60 }
61 let (mut min, mut max) = (f32::INFINITY, f32::NEG_INFINITY);
62 for (_, s) in stream.iter() {
63 if *s < min {
64 min = *s;
65 }
66 if *s > max {
67 max = *s;
68 }
69 }
70 let range = max - min;
71 if range == 0.0 {
72 continue;
73 }
74 for (k, s) in stream.iter() {
75 let norm = (s - min) / range;
76 *totals.entry(k.clone()).or_insert(0.0) += norm * weight;
77 }
78 }
79 let mut out: Vec<(K, f32)> = totals.into_iter().collect();
80 out.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
81 out
82}
83
84/// Reciprocal Rank Fusion. Streams must be pre-sorted by rank (best first).
85pub fn blend_rrf<'a, K: Eq + Hash + Clone + 'a>(
86 streams: &[&'a [(K, f32)]],
87 opts: RrfOpts,
88) -> Vec<(K, f32)> {
89 let mut totals: HashMap<K, f32> = HashMap::new();
90 for stream in streams {
91 for (rank, (k, _)) in stream.iter().enumerate() {
92 let contribution = 1.0 / (opts.k + rank as f32 + 1.0);
93 *totals.entry(k.clone()).or_insert(0.0) += contribution;
94 }
95 }
96 let mut out: Vec<(K, f32)> = totals.into_iter().collect();
97 out.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
98 out
99}