use std::f64::INFINITY;
use num_traits::{Float, NumAssign};
use serde::{Deserialize, Serialize};
pub mod t_scores;
use super::utils;
fn sum<T: Float>(points: &[T]) -> T {
points
.iter()
.fold(T::from(0.0).unwrap(), |sum, point| sum + *point)
}
fn sample_mean<T: Float>(points: &[T]) -> T {
sum(points) / T::from(points.len()).unwrap()
}
fn sample_variance<T: Float>(points: &[T], mean: &T) -> T {
points.iter().fold(T::from(0.0).unwrap(), |acc, point| {
acc + (*point - *mean).powi(2)
}) / T::from(points.len()).unwrap()
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConfidenceInterval<T: Float> {
lower: T,
upper: T,
}
impl<T: Float> ConfidenceInterval<T> {
pub fn lower(&self) -> T {
self.lower
}
pub fn upper(&self) -> T {
self.upper
}
pub fn half_width(&self) -> T {
(self.upper - self.lower) / T::from(2.0).unwrap()
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct IndependentSample<T> {
points: Vec<T>,
mean: T,
variance: T,
}
impl<T: Float> IndependentSample<T> {
pub fn post(points: Vec<T>) -> IndependentSample<T> {
let mean = sample_mean(&points);
let variance = sample_variance(&points, &mean);
IndependentSample {
points,
mean,
variance,
}
}
pub fn confidence_interval_mean(&self, alpha: T) -> ConfidenceInterval<T> {
if self.points.len() == 1 {
return ConfidenceInterval {
lower: self.mean,
upper: self.mean,
};
}
ConfidenceInterval {
lower: self.mean
- t_scores::t_score(alpha, self.points.len() - 1) * self.variance.sqrt()
/ T::from(self.points.len()).unwrap().sqrt(),
upper: self.mean
+ t_scores::t_score(alpha, self.points.len() - 1) * self.variance.sqrt()
/ T::from(self.points.len()).unwrap().sqrt(),
}
}
pub fn point_estimate_mean(&self) -> T {
self.mean
}
pub fn variance(&self) -> T {
self.variance
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct TerminatingSimulationOutput<T> {
time_series_replications: Vec<Vec<T>>,
replication_means: Vec<T>,
replications_mean: Option<T>,
replications_variance: Option<T>,
}
impl<T: Float> TerminatingSimulationOutput<T> {
pub fn post(time_series: Vec<T>) -> TerminatingSimulationOutput<T> {
TerminatingSimulationOutput {
time_series_replications: vec![time_series],
replication_means: Vec::new(),
replications_mean: None,
replications_variance: None,
}
}
pub fn put_time_series(&mut self, time_series: Vec<T>) {
self.time_series_replications.push(time_series);
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct SteadyStateOutput<T> {
time_series: Vec<T>,
deletion_point: Option<usize>,
batch_size: Option<usize>,
batch_count: Option<usize>,
batch_means: Vec<T>,
batches_mean: Option<T>,
batches_variance: Option<T>,
}
impl<T: Float + NumAssign> SteadyStateOutput<T> {
pub fn post(time_series: Vec<T>) -> SteadyStateOutput<T> {
SteadyStateOutput {
time_series,
deletion_point: None,
batch_size: None,
batch_count: None,
batch_means: Vec::new(),
batches_mean: None,
batches_variance: None,
}
}
fn set_to_fixed_budget(&mut self) {
let mut s = T::from(0.0).unwrap();
let mut q = T::from(0.0).unwrap();
let mut d = self.time_series.len() - 2;
let mut mser = vec![T::from(0.0).unwrap(); self.time_series.len() - 1];
loop {
s += self.time_series[d + 1];
q += self.time_series[d + 1].powi(2);
mser[d] = q - s.powi(2)
/ T::from(self.time_series.len() - d).unwrap()
/ T::from(self.time_series.len() - d).unwrap().powi(2);
if d == 0 {
let min_mser = (0..(self.time_series.len() - 1) / 2)
.fold(T::from(INFINITY).unwrap(), |min_mser, mser_index| {
min_mser.min(mser[mser_index])
});
self.deletion_point = mser.iter().position(|mser_value| *mser_value == min_mser);
break;
} else {
d -= 1;
}
}
self.batch_count = Some(usize::min(
utils::usize_sqrt(self.time_series.len() - self.deletion_point.unwrap()),
30,
));
self.batch_size = Some(
(self.time_series.len() - self.deletion_point.unwrap()) / self.batch_count.unwrap(),
);
self.deletion_point =
Some(self.time_series.len() - self.batch_count.unwrap() * self.batch_size.unwrap());
}
fn calculate_batch_statistics(&mut self) {
if self.batch_count.is_none() {
self.set_to_fixed_budget();
}
self.batch_means = (0..self.batch_count.unwrap())
.map(|batch_index| {
let batch_start_index =
self.deletion_point.unwrap() + self.batch_size.unwrap() * batch_index;
let batch_end_index =
self.deletion_point.unwrap() + self.batch_size.unwrap() * (batch_index + 1);
let points: Vec<T> = (batch_start_index..batch_end_index)
.map(|index| self.time_series[index])
.collect();
sample_mean(&points)
})
.collect();
self.batches_mean = Some(sample_mean(&self.batch_means));
self.batches_variance = Some(sample_variance(
&self.batch_means,
&self.batches_mean.unwrap(),
));
}
pub fn confidence_interval_mean(&mut self, alpha: T) -> ConfidenceInterval<T> {
if self.batches_mean.is_none() {
self.calculate_batch_statistics();
}
if self.batch_count.unwrap() == 1 {
return ConfidenceInterval {
lower: self.batches_mean.unwrap(),
upper: self.batches_mean.unwrap(),
};
}
ConfidenceInterval {
lower: self.batches_mean.unwrap()
- t_scores::t_score(alpha, self.batch_count.unwrap() - 1)
* self.batches_variance.unwrap().sqrt()
/ T::from(self.batch_count.unwrap()).unwrap().sqrt(),
upper: self.batches_mean.unwrap()
+ t_scores::t_score(alpha, self.batch_count.unwrap() - 1)
* self.batches_variance.unwrap().sqrt()
/ T::from(self.batch_count.unwrap()).unwrap().sqrt(),
}
}
pub fn point_estimate_mean(&mut self) -> T {
if self.batches_mean.is_none() {
self.calculate_batch_statistics();
}
self.batches_mean.unwrap()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn epsilon() -> f64 {
1.0e-12
}
#[test]
fn confidence_interval_mean() {
let sample = IndependentSample::post(vec![
1.02, 0.73, 3.20, 0.23, 1.76, 0.47, 1.89, 1.45, 0.44, 0.23,
]);
let confidence_interval = sample.confidence_interval_mean(0.1);
assert!((confidence_interval.lower - 0.7492630635369267).abs() < epsilon());
assert!((confidence_interval.upper - 1.534736936463073).abs() < epsilon());
}
}