use std::cmp::Ordering;
#[derive(Copy, Clone, Debug)]
struct HalfSampleInterval {
pub start_index: usize,
pub start_sample: f64,
pub bins: usize,
pub cume_weight: f64,
pub width: f64
}
impl HalfSampleInterval {
pub fn new(index: usize, sample: f64, weight: f64) -> Self {
HalfSampleInterval {
start_index: index,
start_sample: sample,
bins: 1,
cume_weight: weight,
width: 0.0
}
}
pub fn reset(&mut self, index: usize, sample: f64, weight: f64) {
self.start_index = index;
self.start_sample = sample;
self.bins = 1;
self.cume_weight = weight;
self.width = 0.0;
}
pub fn empty() -> Self {
Self::new(0, 0.0, 0.0)
}
pub fn is_empty(&self) -> bool {
self.cume_weight == 0.0
}
pub fn add(&mut self, sample: f64, weight: f64) {
self.bins += 1;
self.cume_weight += weight;
self.width = sample - self.start_sample;
}
pub fn is_underweight(&self, minimum_weight: f64) -> bool {
self.cume_weight < minimum_weight
}
pub fn is_denser_than(&self, other: &Self, minimum_weight: f64) -> bool {
if other.is_empty() && !self.is_empty() { true }
else if self.is_underweight(minimum_weight) { false }
else if other.is_underweight(minimum_weight) { true }
else if self.width < other.width { true }
else if self.width > other.width { false }
else { self.cume_weight > other.cume_weight }
}
}
pub fn half_sample_mode(samples: &[f64], weights: &[f64], total_weight: f64) -> Option<f64> {
if total_weight == 0.0 { None }
else {
let count = samples.len();
if count < 5 {
let mut max_weight = 0.0;
let mut samples_with_max_weight: Vec<f64> = Vec::new();
for (&sample, &weight) in samples.iter().zip(weights) {
if weight > max_weight {
max_weight = weight;
samples_with_max_weight = vec![sample];
}
else if weight == max_weight {
samples_with_max_weight.push(sample);
}
}
return Some(samples_with_max_weight.iter().sum::<f64>() / samples_with_max_weight.len() as f64);
}
let mut cume_target = total_weight / 2.0;
let mut start_index = 0;
let mut limit_index = samples.len();
let mut densest = HalfSampleInterval::empty();
while (limit_index - start_index) >= 2 {
densest = HalfSampleInterval::empty();
for interval_start in start_index..limit_index {
let mut current_interval = HalfSampleInterval::empty();
let mut made_weight = false;
for index in interval_start..limit_index {
if current_interval.is_empty() {
current_interval.reset(index, samples[index], weights[index]);
}
else {
current_interval.add(samples[index], weights[index]);
}
if current_interval.is_denser_than(&densest, cume_target) {
densest = current_interval;
}
if !current_interval.is_underweight(cume_target) {
made_weight = true;
break;
}
}
if !made_weight {
break;
}
}
start_index = densest.start_index;
limit_index = start_index + densest.bins;
cume_target = cume_target / 2.0;
}
match densest.bins {
0 => None,
1 => Some(samples[densest.start_index]),
2 => {
let i = densest.start_index;
let weight_sum = weights[i] + weights[i+1];
if weight_sum <= 0.0 {
None
}
else {
let weighted_mean = (samples[i] * weights[i] + samples[i+1] * weights[i+1]) / weight_sum;
Some(weighted_mean)
}
},
3 => {
let i = densest.start_index;
let weight_sum = weights[i] + weights[i+1] + weights[i+2];
if weight_sum <= 0.0 {
None
}
else {
let weighted_mean = (
samples[i] * weights[i]
+ samples[i+1] * weights[i+1]
+ samples[i+2] * weights[i+2])
/ weight_sum;
Some(weighted_mean)
}
},
_ => None
}
}
}
#[derive(Copy, Clone, Debug)]
pub struct HalfSampleModeCache {
mode: Option<f64>,
latest_sample: f64,
bins: usize,
total_weight: f64,
neighborhood: usize,
allowed_bin_change: usize,
allowed_weight_change_fraction: f64,
allowed_outside_neighborhood: usize,
outside_neighborhood_counter: usize,
cache_hits: usize
}
impl HalfSampleModeCache {
pub fn new(neighborhood: usize, allowed_bin_change: usize, allowed_weight_change_fraction: f64) -> Self {
HalfSampleModeCache {
mode: None,
latest_sample: f64::NAN,
bins: 0,
total_weight: 0.0,
neighborhood: neighborhood,
allowed_bin_change: allowed_bin_change,
allowed_weight_change_fraction: allowed_weight_change_fraction,
allowed_outside_neighborhood: 12,
outside_neighborhood_counter: 0,
cache_hits: 0
}
}
pub fn new_default() -> Self {
HalfSampleModeCache {
mode: None,
latest_sample: f64::NAN,
bins: 0,
total_weight: 0.0,
neighborhood: 70,
allowed_bin_change: 10,
allowed_weight_change_fraction: 0.01,
allowed_outside_neighborhood: 12,
outside_neighborhood_counter: 0,
cache_hits: 0
}
}
pub fn record(&mut self, sample: f64) {
self.latest_sample = sample;
}
pub fn hits(&self) -> usize { self.cache_hits }
fn full_recalc(&mut self, samples: &[f64], weights: &[f64], total_weight: f64) -> Option<f64> {
self.bins = samples.len();
self.total_weight = total_weight;
self.mode = half_sample_mode(samples, weights, total_weight);
self.outside_neighborhood_counter = 0;
self.mode
}
#[inline]
pub fn total_cmp(this: &f64, other: &f64) -> Ordering {
let mut left = this.to_bits() as i64;
let mut right = other.to_bits() as i64;
left ^= (((left >> 63) as u64) >> 1) as i64;
right ^= (((right >> 63) as u64) >> 1) as i64;
left.cmp(&right)
}
fn partial_recalc(&mut self, prev_mode_position: usize, samples: &[f64], weights: &[f64], total_weight: f64) -> Option<f64> {
self.cache_hits += 1;
self.bins = samples.len();
self.total_weight = total_weight;
let half_neighborhood = self.neighborhood / 2;
let start_index = if prev_mode_position < half_neighborhood { 0 }
else { prev_mode_position - half_neighborhood };
let stop_index = if prev_mode_position + half_neighborhood >= samples.len() { samples.len() }
else { prev_mode_position + half_neighborhood };
self.mode = half_sample_mode(&samples[start_index..stop_index], &weights[start_index..stop_index], total_weight);
self.mode
}
pub fn hsm(&mut self, samples: &[f64], weights: &[f64], total_weight: f64) -> Option<f64> {
let new_bins = samples.len();
let do_full_recalc = self.mode.is_none()
|| new_bins < 64 || new_bins - self.bins > self.allowed_bin_change
|| (total_weight - self.total_weight) / self.total_weight > self.allowed_weight_change_fraction
;
let prev_mode = self.mode.unwrap_or_default();
if !do_full_recalc && prev_mode.is_finite() {
let prev_mode_position = match samples.binary_search_by(|probe| Self::total_cmp(probe, &prev_mode)) {
Result::Ok(pos) => pos,
Err(pos) => pos
};
let newest_sample_position = match samples.binary_search_by(|probe| Self::total_cmp(probe, &self.latest_sample)) {
Result::Ok(pos) => pos,
Err(pos) => pos
};
let spread = usize::max(prev_mode_position, newest_sample_position) - usize::min(prev_mode_position, newest_sample_position);
let third_neighborhood = self.neighborhood / 3;
let half_neighborhood = self.neighborhood / 2;
if spread > half_neighborhood && self.outside_neighborhood_counter < self.allowed_outside_neighborhood {
self.outside_neighborhood_counter += 1;
self.cache_hits += 1;
self.mode
}
else if spread > third_neighborhood {
if self.outside_neighborhood_counter >= self.allowed_outside_neighborhood {
self.full_recalc(samples, weights, total_weight)
}
else {
self.outside_neighborhood_counter += 1;
self.partial_recalc(prev_mode_position, samples, weights, total_weight)
}
}
else {
self.partial_recalc(prev_mode_position, samples, weights, total_weight)
}
}
else {
self.full_recalc(samples, weights, total_weight)
}
}
pub fn clear(&mut self) {
self.mode = None;
self.latest_sample = f64::NAN;
self.bins = 0;
self.total_weight = f64::NAN;
self.cache_hits = 0;
self.outside_neighborhood_counter = 0;
}
}
#[cfg(test)]
mod tests {
use std::time::{Duration, Instant};
use super::*;
#[test]
fn hsm_cache_performance() {
let no_cache_duration = simulate_convergence(1200, false).as_secs_f64();
let with_cache_duration = simulate_convergence(1200, true).as_secs_f64();
let speedup = no_cache_duration / with_cache_duration;
println!("
With cache: {:.2} sec
Without Cache: {:.2} sec
Speedup: {:.3}x
", with_cache_duration, no_cache_duration, speedup);
assert!(speedup > 8.0);
}
fn golden_ratio_sequence(n: usize) -> f64 {
let phi = (1.0 + 5.0_f64.sqrt()) / 2.0;
(n as f64 * phi) % 1.0
}
fn insert_sorted(value: f64, samples: &mut Vec<f64>) {
let pos = match samples.binary_search_by(|probe| HalfSampleModeCache::total_cmp(probe, &value)) {
Result::Ok(pos) => pos,
Err(pos) => pos
};
samples.insert(pos, value);
}
fn simulate_convergence(iterations: usize, use_cache: bool) -> Duration {
let true_value = 100.0;
let start_value = 0.0;
let start_error = start_value - true_value;
let mut values: Vec<f64> = Vec::new();
let mut weights: Vec<f64> = Vec::new();
let mut weight_sum = 0.0;
let mut cache = HalfSampleModeCache::new_default();
let start_time = Instant::now();
for i in 1..=iterations {
let error = start_error / (i as f64).sqrt();
let sign = if golden_ratio_sequence(i) < 0.3 { -1.0 } else { 1.0 };
let next_value = true_value + sign * error;
insert_sorted(next_value, &mut values);
weights.push(1.0);
weight_sum += 1.0;
if use_cache {
cache.record(next_value);
let _mode = cache.hsm(&values, &weights, weight_sum);
}
else {
let _mode = half_sample_mode(&values, &weights, weight_sum);
}
}
if use_cache {
println!(" Cache hits: {}. Cache misses: {}", cache.hits(), iterations - cache.hits());
}
start_time.elapsed()
}
}