use std::cmp::Ordering;
use std::collections::BTreeSet;
use std::fmt;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Impact {
pub freq: i32,
pub norm: i64,
}
impl Ord for Impact {
fn cmp(&self, other: &Self) -> Ordering {
self.freq
.cmp(&other.freq)
.then_with(|| (other.norm as u64).cmp(&(self.norm as u64)))
}
}
impl PartialOrd for Impact {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
pub struct CompetitiveImpactAccumulator {
max_freqs: [i32; 256],
other_freq_norm_pairs: BTreeSet<Impact>,
}
impl Default for CompetitiveImpactAccumulator {
fn default() -> Self {
Self::new()
}
}
impl CompetitiveImpactAccumulator {
pub fn new() -> Self {
Self {
max_freqs: [0; 256],
other_freq_norm_pairs: BTreeSet::new(),
}
}
pub fn clear(&mut self) {
self.max_freqs.fill(0);
self.other_freq_norm_pairs.clear();
}
pub fn add(&mut self, freq: i32, norm: i64) {
if norm >= i8::MIN as i64 && norm <= i8::MAX as i64 {
let index = norm as i8 as u8 as usize;
self.max_freqs[index] = self.max_freqs[index].max(freq);
} else {
let entry = Impact { freq, norm };
add_to_set(entry, &mut self.other_freq_norm_pairs);
}
}
pub fn add_all(&mut self, other: &Self) {
for i in 0..self.max_freqs.len() {
self.max_freqs[i] = self.max_freqs[i].max(other.max_freqs[i]);
}
for &entry in &other.other_freq_norm_pairs {
add_to_set(entry, &mut self.other_freq_norm_pairs);
}
}
pub fn get_competitive_freq_norm_pairs(&self) -> Vec<Impact> {
let mut impacts = Vec::new();
let mut max_freq_for_lower_norms = 0;
for i in 0..self.max_freqs.len() {
let max_freq = self.max_freqs[i];
if max_freq > max_freq_for_lower_norms {
let norm = i as u8 as i8 as i64;
impacts.push(Impact {
freq: max_freq,
norm,
});
max_freq_for_lower_norms = max_freq;
}
}
if self.other_freq_norm_pairs.is_empty() {
return impacts;
}
let mut freq_norm_pairs = self.other_freq_norm_pairs.clone();
for impact in impacts {
add_to_set(impact, &mut freq_norm_pairs);
}
freq_norm_pairs.into_iter().collect()
}
}
pub const NO_BOOST: i64 = 1;
pub trait NormsLookup: fmt::Debug {
fn get(&self, doc_id: i32) -> i64;
}
#[derive(Debug)]
pub struct BufferedNormsLookup<'a> {
norms: &'a [i64],
norms_docs: &'a [i32],
}
impl<'a> BufferedNormsLookup<'a> {
pub fn new(norms: &'a [i64], norms_docs: &'a [i32]) -> Self {
Self { norms, norms_docs }
}
pub fn no_norms() -> Self {
Self {
norms: &[],
norms_docs: &[],
}
}
}
impl NormsLookup for BufferedNormsLookup<'_> {
fn get(&self, doc_id: i32) -> i64 {
match self.norms_docs.binary_search(&doc_id) {
Ok(idx) => self.norms[idx],
Err(_) => NO_BOOST,
}
}
}
fn add_to_set(new_entry: Impact, set: &mut BTreeSet<Impact>) {
let ceiling = set.range(new_entry..).next().copied();
if let Some(next) = ceiling
&& (next.norm as u64) <= (new_entry.norm as u64)
{
return;
}
set.insert(new_entry);
let to_remove: Vec<Impact> = set
.range(..new_entry)
.rev()
.copied()
.take_while(|e| (e.norm as u64) >= (new_entry.norm as u64))
.collect();
for e in to_remove {
set.remove(&e);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_impact_ord_ascending_freq() {
let a = Impact { freq: 3, norm: 5 };
let b = Impact { freq: 7, norm: 5 };
assert_lt!(a, b);
}
#[test]
fn test_impact_ord_descending_unsigned_norm_for_equal_freq() {
let a = Impact { freq: 5, norm: 10 };
let b = Impact { freq: 5, norm: 5 };
assert_lt!(a, b); }
fn impacts_to_pairs(impacts: &[Impact]) -> Vec<(i32, i64)> {
impacts.iter().map(|i| (i.freq, i.norm)).collect()
}
#[test]
fn test_basics() {
let mut acc = CompetitiveImpactAccumulator::new();
acc.add(3, 5);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(3, 5)]
);
acc.add(6, 11);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(3, 5), (6, 11)]
);
acc.add(10, 13);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(3, 5), (6, 11), (10, 13)]
);
acc.add(1, 2);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(1, 2), (3, 5), (6, 11), (10, 13)]
);
acc.add(7, 9);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(1, 2), (3, 5), (7, 9), (10, 13)]
);
acc.add(8, 2);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(8, 2), (10, 13)]
);
}
#[test]
fn test_extreme_norms() {
let mut acc = CompetitiveImpactAccumulator::new();
acc.add(3, 5);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(3, 5)]
);
acc.add(10, 10000);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(3, 5), (10, 10000)]
);
acc.add(5, 200);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(3, 5), (5, 200), (10, 10000)]
);
acc.add(20, -100);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(3, 5), (5, 200), (10, 10000), (20, -100)]
);
acc.add(30, -3);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(3, 5), (5, 200), (10, 10000), (20, -100), (30, -3)]
);
}
#[test]
fn test_add_all() {
let mut acc = CompetitiveImpactAccumulator::new();
acc.add(3, 5);
let mut merged = CompetitiveImpactAccumulator::new();
merged.add_all(&acc);
assert_eq!(
acc.get_competitive_freq_norm_pairs(),
merged.get_competitive_freq_norm_pairs()
);
acc.add(10, 10000);
merged.clear();
merged.add_all(&acc);
assert_eq!(
acc.get_competitive_freq_norm_pairs(),
merged.get_competitive_freq_norm_pairs()
);
acc.add(5, 200);
merged.clear();
merged.add_all(&acc);
assert_eq!(
acc.get_competitive_freq_norm_pairs(),
merged.get_competitive_freq_norm_pairs()
);
acc.add(20, -100);
merged.clear();
merged.add_all(&acc);
assert_eq!(
acc.get_competitive_freq_norm_pairs(),
merged.get_competitive_freq_norm_pairs()
);
acc.add(30, -3);
merged.clear();
merged.add_all(&acc);
assert_eq!(
acc.get_competitive_freq_norm_pairs(),
merged.get_competitive_freq_norm_pairs()
);
}
#[test]
fn test_omit_freqs() {
let mut acc = CompetitiveImpactAccumulator::new();
acc.add(1, 5);
acc.add(1, 7);
acc.add(1, 4);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(1, 4)]
);
}
#[test]
fn test_omit_norms() {
let mut acc = CompetitiveImpactAccumulator::new();
acc.add(5, 1);
acc.add(7, 1);
acc.add(4, 1);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(7, 1)]
);
}
#[test]
fn test_single_impact() {
let mut acc = CompetitiveImpactAccumulator::new();
acc.add(42, 10);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(42, 10)]
);
}
#[test]
fn test_clear() {
let mut acc = CompetitiveImpactAccumulator::new();
acc.add(5, 10);
acc.add(10, 20);
acc.clear();
assert_is_empty!(acc.get_competitive_freq_norm_pairs());
}
#[test]
fn test_byte_range_boundaries() {
let mut acc = CompetitiveImpactAccumulator::new();
acc.add(1, -128); acc.add(2, 127); acc.add(3, 128);
let pairs = impacts_to_pairs(&acc.get_competitive_freq_norm_pairs());
assert_eq!(pairs, [(2, 127), (3, 128)]);
}
#[test]
fn test_same_norm_max_freq_wins() {
let mut acc = CompetitiveImpactAccumulator::new();
acc.add(3, 10);
acc.add(7, 10);
acc.add(5, 10);
assert_eq!(
impacts_to_pairs(&acc.get_competitive_freq_norm_pairs()),
[(7, 10)]
);
}
}