Skip to main content

rerank_blend/
lib.rs

1//! # rerank-blend
2//!
3//! Blend N reranker score streams into one final ranking.
4//!
5//! Two strategies:
6//!
7//! - [`blend_weighted`] — min-max-normalize each stream, multiply by
8//!   its weight, sum, return sorted descending.
9//! - [`blend_rrf`] — Reciprocal Rank Fusion (Cormack et al. 2009):
10//!   each stream contributes `1 / (k + rank)`, default `k = 60`.
11//!   Score-free; works across heterogeneous score scales.
12//!
13//! ## Example
14//!
15//! ```
16//! use rerank_blend::{blend_weighted, blend_rrf, RrfOpts};
17//!
18//! let dense = vec![("a", 0.91), ("b", 0.88), ("c", 0.55)];
19//! let bm25  = vec![("a", 12.3), ("c", 8.1), ("b", 3.4)];
20//!
21//! let blended = blend_weighted(&[(&dense, 0.7), (&bm25, 0.3)]);
22//! assert_eq!(blended[0].0, "a");
23//!
24//! let rrf = blend_rrf(&[&dense, &bm25], RrfOpts::default());
25//! assert_eq!(rrf[0].0, "a");
26//! ```
27
28#![deny(missing_docs)]
29
30use std::collections::HashMap;
31use std::hash::Hash;
32
33/// Options for Reciprocal Rank Fusion.
34#[derive(Debug, Clone, Copy)]
35pub struct RrfOpts {
36    /// The `k` smoothing constant. Cormack's paper recommends 60.
37    pub k: f32,
38}
39
40impl Default for RrfOpts {
41    fn default() -> Self {
42        Self { k: 60.0 }
43    }
44}
45
46/// Weighted blend with per-stream min-max normalization. Streams whose
47/// max == min contribute zero to ensure the normalization is well-defined.
48///
49/// Input: a slice of `(stream, weight)` pairs. Each stream is a slice
50/// of `(id, score)` pre-sorted in any order.
51///
52/// Output: sorted descending by the blended score.
53pub fn blend_weighted<'a, K: Eq + Hash + Clone + 'a>(
54    streams: &[(&'a [(K, f32)], f32)],
55) -> Vec<(K, f32)> {
56    let mut totals: HashMap<K, f32> = HashMap::new();
57    for (stream, weight) in streams {
58        if stream.is_empty() {
59            continue;
60        }
61        let (mut min, mut max) = (f32::INFINITY, f32::NEG_INFINITY);
62        for (_, s) in stream.iter() {
63            if *s < min {
64                min = *s;
65            }
66            if *s > max {
67                max = *s;
68            }
69        }
70        let range = max - min;
71        if range == 0.0 {
72            continue;
73        }
74        for (k, s) in stream.iter() {
75            let norm = (s - min) / range;
76            *totals.entry(k.clone()).or_insert(0.0) += norm * weight;
77        }
78    }
79    let mut out: Vec<(K, f32)> = totals.into_iter().collect();
80    out.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
81    out
82}
83
84/// Reciprocal Rank Fusion. Streams must be pre-sorted by rank (best first).
85pub fn blend_rrf<'a, K: Eq + Hash + Clone + 'a>(
86    streams: &[&'a [(K, f32)]],
87    opts: RrfOpts,
88) -> Vec<(K, f32)> {
89    let mut totals: HashMap<K, f32> = HashMap::new();
90    for stream in streams {
91        for (rank, (k, _)) in stream.iter().enumerate() {
92            let contribution = 1.0 / (opts.k + rank as f32 + 1.0);
93            *totals.entry(k.clone()).or_insert(0.0) += contribution;
94        }
95    }
96    let mut out: Vec<(K, f32)> = totals.into_iter().collect();
97    out.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
98    out
99}