#![allow(dead_code)]
#![allow(clippy::cast_precision_loss)]
use uuid::Uuid;
pub fn min_max_normalize(scores: &[(Uuid, f32)]) -> Vec<(Uuid, f32)> {
if scores.is_empty() {
return Vec::new();
}
let min = scores.iter().map(|(_, s)| *s).fold(f32::INFINITY, f32::min);
let max = scores
.iter()
.map(|(_, s)| *s)
.fold(f32::NEG_INFINITY, f32::max);
let range = max - min;
scores
.iter()
.map(|(id, s)| {
let normalised = if range < f32::EPSILON {
0.0
} else {
(s - min) / range
};
(*id, normalised)
})
.collect()
}
pub fn softmax_normalize(scores: &[(Uuid, f32)]) -> Vec<(Uuid, f32)> {
if scores.is_empty() {
return Vec::new();
}
let max_score = scores
.iter()
.map(|(_, s)| *s)
.fold(f32::NEG_INFINITY, f32::max);
let exps: Vec<f32> = scores.iter().map(|(_, s)| (s - max_score).exp()).collect();
let sum: f32 = exps.iter().sum();
scores
.iter()
.zip(exps)
.map(|((id, _), e)| (*id, if sum < f32::EPSILON { 0.0 } else { e / sum }))
.collect()
}
#[must_use]
pub fn top_k(scores: &[(Uuid, f32)], k: usize) -> Vec<(Uuid, f32)> {
let mut sorted = scores.to_vec();
sorted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
sorted.truncate(k);
sorted
}
#[must_use]
pub fn bottom_k(scores: &[(Uuid, f32)], k: usize) -> Vec<(Uuid, f32)> {
let mut sorted = scores.to_vec();
sorted.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
sorted.truncate(k);
sorted
}
#[must_use]
pub fn weighted_fusion(
list_a: &[(Uuid, f32)],
weight_a: f32,
list_b: &[(Uuid, f32)],
weight_b: f32,
) -> Vec<(Uuid, f32)> {
let mut combined: std::collections::HashMap<Uuid, f32> = std::collections::HashMap::new();
for (id, score) in list_a {
*combined.entry(*id).or_insert(0.0) += weight_a * score;
}
for (id, score) in list_b {
*combined.entry(*id).or_insert(0.0) += weight_b * score;
}
let mut result: Vec<(Uuid, f32)> = combined.into_iter().collect();
result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
result
}
#[must_use]
pub fn reciprocal_rank_fusion(lists: &[&[(Uuid, f32)]], k: f32) -> Vec<(Uuid, f32)> {
let mut scores: std::collections::HashMap<Uuid, f32> = std::collections::HashMap::new();
for list in lists {
for (rank, (id, _)) in list.iter().enumerate() {
*scores.entry(*id).or_insert(0.0) += 1.0 / (k + rank as f32 + 1.0);
}
}
let mut result: Vec<(Uuid, f32)> = scores.into_iter().collect();
result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
result
}
#[derive(Debug, Clone)]
pub struct RankItem {
pub id: Uuid,
pub score: f32,
pub features: Vec<f32>,
}
impl RankItem {
#[must_use]
pub fn new(id: Uuid, score: f32, features: Vec<f32>) -> Self {
Self {
id,
score,
features,
}
}
}
#[must_use]
pub fn mmr_rerank(items: &[RankItem], k: usize, lambda: f32) -> Vec<Uuid> {
if items.is_empty() || k == 0 {
return Vec::new();
}
let mut selected: Vec<usize> = Vec::with_capacity(k);
let mut remaining: Vec<usize> = (0..items.len()).collect();
while selected.len() < k && !remaining.is_empty() {
let best_idx = remaining
.iter()
.copied()
.max_by(|&i, &j| {
let mmr_i = mmr_score(items, i, &selected, lambda);
let mmr_j = mmr_score(items, j, &selected, lambda);
mmr_i
.partial_cmp(&mmr_j)
.unwrap_or(std::cmp::Ordering::Equal)
})
.expect("remaining is non-empty");
selected.push(best_idx);
remaining.retain(|&x| x != best_idx);
}
selected.iter().map(|&i| items[i].id).collect()
}
fn mmr_score(items: &[RankItem], candidate: usize, selected: &[usize], lambda: f32) -> f32 {
let rel = items[candidate].score;
if selected.is_empty() {
return lambda * rel;
}
let max_sim = selected
.iter()
.map(|&s| cosine_sim_vecs(&items[candidate].features, &items[s].features))
.fold(f32::NEG_INFINITY, f32::max);
lambda * rel - (1.0 - lambda) * max_sim
}
fn cosine_sim_vecs(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if na < f32::EPSILON || nb < f32::EPSILON {
0.0
} else {
dot / (na * nb)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn uid() -> Uuid {
Uuid::new_v4()
}
fn make_scores(vals: &[f32]) -> Vec<(Uuid, f32)> {
vals.iter().map(|&v| (uid(), v)).collect()
}
#[test]
fn test_min_max_basic() {
let scores = make_scores(&[0.0, 5.0, 10.0]);
let norm = min_max_normalize(&scores);
assert!((norm[0].1).abs() < 1e-5); assert!((norm[1].1 - 0.5).abs() < 1e-5); assert!((norm[2].1 - 1.0).abs() < 1e-5); }
#[test]
fn test_min_max_all_equal() {
let scores = make_scores(&[3.0, 3.0, 3.0]);
let norm = min_max_normalize(&scores);
assert!(norm.iter().all(|(_, s)| s.abs() < f32::EPSILON));
}
#[test]
fn test_min_max_empty() {
let norm = min_max_normalize(&[]);
assert!(norm.is_empty());
}
#[test]
fn test_softmax_sums_to_one() {
let scores = make_scores(&[1.0, 2.0, 3.0]);
let norm = softmax_normalize(&scores);
let total: f32 = norm.iter().map(|(_, s)| s).sum();
assert!((total - 1.0).abs() < 1e-5);
}
#[test]
fn test_softmax_higher_wins() {
let scores = make_scores(&[1.0, 10.0]);
let norm = softmax_normalize(&scores);
assert!(norm[1].1 > norm[0].1);
}
#[test]
fn test_softmax_empty() {
let norm = softmax_normalize(&[]);
assert!(norm.is_empty());
}
#[test]
fn test_top_k_basic() {
let scores = make_scores(&[3.0, 1.0, 4.0, 1.5, 2.0]);
let top = top_k(&scores, 2);
assert_eq!(top.len(), 2);
assert!((top[0].1 - 4.0).abs() < 1e-5);
assert!((top[1].1 - 3.0).abs() < 1e-5);
}
#[test]
fn test_top_k_larger_than_input() {
let scores = make_scores(&[1.0, 2.0]);
let top = top_k(&scores, 100);
assert_eq!(top.len(), 2);
}
#[test]
fn test_bottom_k_basic() {
let scores = make_scores(&[5.0, 1.0, 3.0]);
let bot = bottom_k(&scores, 2);
assert_eq!(bot.len(), 2);
assert!((bot[0].1 - 1.0).abs() < 1e-5);
}
#[test]
fn test_weighted_fusion_same_lists() {
let id = uid();
let list = vec![(id, 1.0)];
let fused = weighted_fusion(&list, 0.5, &list, 0.5);
assert_eq!(fused.len(), 1);
assert!((fused[0].1 - 1.0).abs() < 1e-5);
}
#[test]
fn test_weighted_fusion_disjoint() {
let a = vec![(uid(), 1.0)];
let b = vec![(uid(), 1.0)];
let fused = weighted_fusion(&a, 1.0, &b, 1.0);
assert_eq!(fused.len(), 2);
}
#[test]
fn test_rrf_single_list() {
let list = make_scores(&[10.0, 8.0, 6.0]);
let fused = reciprocal_rank_fusion(&[&list], 60.0);
assert_eq!(fused.len(), 3);
assert_eq!(fused[0].0, list[0].0);
}
#[test]
fn test_rrf_two_lists_agree() {
let ids: Vec<Uuid> = (0..3).map(|_| uid()).collect();
let list_a: Vec<(Uuid, f32)> = ids.iter().copied().zip([3.0, 2.0, 1.0]).collect();
let list_b: Vec<(Uuid, f32)> = ids.iter().copied().zip([3.0, 2.0, 1.0]).collect();
let fused = reciprocal_rank_fusion(&[&list_a, &list_b], 60.0);
assert_eq!(fused[0].0, ids[0]);
}
#[test]
fn test_mmr_rerank_pure_relevance() {
let items = vec![
RankItem::new(uid(), 0.9, vec![1.0, 0.0]),
RankItem::new(uid(), 0.7, vec![0.9, 0.1]),
RankItem::new(uid(), 0.5, vec![0.0, 1.0]),
];
let result = mmr_rerank(&items, 2, 1.0);
assert_eq!(result.len(), 2);
assert_eq!(result[0], items[0].id);
}
#[test]
fn test_mmr_rerank_empty() {
let result = mmr_rerank(&[], 5, 0.5);
assert!(result.is_empty());
}
#[test]
fn test_mmr_rerank_k_zero() {
let items = vec![RankItem::new(uid(), 1.0, vec![1.0])];
let result = mmr_rerank(&items, 0, 0.5);
assert!(result.is_empty());
}
}