use crate::RetrieveError;
pub struct CandidateEvolution {
current_size: usize,
expected_size: f32,
variance: f32,
}
impl CandidateEvolution {
pub fn new() -> Self {
Self {
current_size: 0,
expected_size: 0.0,
variance: 0.0,
}
}
pub fn update(&mut self, new_size: usize) {
let old_expected = self.expected_size;
self.current_size = new_size;
self.expected_size = 0.9 * old_expected + 0.1 * new_size as f32;
let diff = new_size as f32 - self.expected_size;
self.variance = 0.9 * self.variance + 0.1 * diff * diff;
}
#[allow(dead_code)]
pub fn expected_size(&self) -> f32 {
self.expected_size
}
#[allow(dead_code)]
pub fn variance(&self) -> f32 {
self.variance
}
}
pub fn prune_candidates_martingale(
candidates: &[(u32, f32)],
truncation_r: f32,
vectors: &[f32],
dimension: usize,
) -> Result<Vec<u32>, RetrieveError> {
if candidates.is_empty() {
return Ok(Vec::new());
}
let mut sorted: Vec<(u32, f32)> = candidates.to_vec();
sorted.sort_unstable_by(|a, b| a.1.total_cmp(&b.1));
let mut pruned = Vec::new();
for (id, dist) in sorted {
if dist <= truncation_r {
pruned.push(id);
} else {
break;
}
}
let max_degree =
crate::sng::optimization::estimate_max_degree(vectors.len() / dimension, truncation_r);
if pruned.len() > max_degree {
pruned.truncate(max_degree);
}
Ok(pruned)
}