use std::collections::VecDeque;
#[derive(Debug, Clone)]
pub struct PredictorConfig {
pub default_height: u16,
pub prior_strength: f64,
pub prior_mean: f64,
pub prior_variance: f64,
pub coverage: f64,
pub calibration_window: usize,
}
impl Default for PredictorConfig {
fn default() -> Self {
Self {
default_height: 1,
prior_strength: 2.0,
prior_mean: 1.0,
prior_variance: 4.0,
coverage: 0.90,
calibration_window: 200,
}
}
}
#[derive(Debug, Clone)]
struct WelfordStats {
n: u64,
mean: f64,
m2: f64, }
impl WelfordStats {
fn new() -> Self {
Self {
n: 0,
mean: 0.0,
m2: 0.0,
}
}
fn update(&mut self, x: f64) {
self.n += 1;
let delta = x - self.mean;
self.mean += delta / self.n as f64;
let delta2 = x - self.mean;
self.m2 += delta * delta2;
}
fn variance(&self) -> f64 {
if self.n < 2 {
return f64::MAX;
}
self.m2 / (self.n - 1) as f64
}
}
#[derive(Debug, Clone)]
struct CategoryState {
welford: WelfordStats,
posterior_mean: f64,
posterior_kappa: f64,
residuals: VecDeque<f64>,
}
#[derive(Debug, Clone, Copy)]
pub struct HeightPrediction {
pub predicted: u16,
pub lower: u16,
pub upper: u16,
pub observations: u64,
}
#[derive(Debug, Clone)]
pub struct HeightPredictor {
config: PredictorConfig,
categories: Vec<CategoryState>,
total_measurements: u64,
total_violations: u64,
}
impl HeightPredictor {
pub fn new(config: PredictorConfig) -> Self {
let default_cat = CategoryState {
welford: WelfordStats::new(),
posterior_mean: config.prior_mean,
posterior_kappa: config.prior_strength,
residuals: VecDeque::new(),
};
Self {
config,
categories: vec![default_cat],
total_measurements: 0,
total_violations: 0,
}
}
pub fn register_category(&mut self) -> usize {
let id = self.categories.len();
self.categories.push(CategoryState {
welford: WelfordStats::new(),
posterior_mean: self.config.prior_mean,
posterior_kappa: self.config.prior_strength,
residuals: VecDeque::new(),
});
id
}
pub fn predict(&self, category: usize) -> HeightPrediction {
let cat = match self.categories.get(category) {
Some(c) => c,
None => return self.cold_prediction(),
};
if cat.welford.n == 0 {
return self.cold_prediction();
}
let mu = cat.posterior_mean;
let predicted = mu.round().max(1.0) as u16;
let (lower, upper) = self.conformal_bounds(cat, mu);
HeightPrediction {
predicted,
lower,
upper,
observations: cat.welford.n,
}
}
pub fn observe(&mut self, category: usize, actual_height: u16) -> bool {
while self.categories.len() <= category {
self.register_category();
}
let prediction = self.predict(category);
let within_bounds = actual_height >= prediction.lower && actual_height <= prediction.upper;
self.total_measurements += 1;
if !within_bounds && prediction.observations > 0 {
self.total_violations += 1;
}
let cat = &mut self.categories[category];
let h = actual_height as f64;
let residual = (cat.posterior_mean - h).abs();
cat.residuals.push_back(residual);
if cat.residuals.len() > self.config.calibration_window {
cat.residuals.pop_front();
}
cat.welford.update(h);
let n = cat.welford.n as f64;
let kappa_0 = self.config.prior_strength;
let mu_0 = self.config.prior_mean;
cat.posterior_kappa = kappa_0 + n;
cat.posterior_mean = (kappa_0 * mu_0 + n * cat.welford.mean) / cat.posterior_kappa;
within_bounds
}
fn cold_prediction(&self) -> HeightPrediction {
let d = self.config.default_height;
let margin = (self.config.prior_variance.sqrt() * 2.0).ceil() as u16;
HeightPrediction {
predicted: d,
lower: d.saturating_sub(margin),
upper: d.saturating_add(margin),
observations: 0,
}
}
fn conformal_bounds(&self, cat: &CategoryState, mu: f64) -> (u16, u16) {
if cat.residuals.is_empty() {
let margin = (self.config.prior_variance.sqrt() * 2.0).ceil() as u16;
let predicted = mu.round().max(1.0) as u16;
return (
predicted.saturating_sub(margin),
predicted.saturating_add(margin),
);
}
let mut sorted: Vec<f64> = cat.residuals.iter().copied().collect();
sorted.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let alpha = 1.0 - self.config.coverage;
let n = sorted.len() as f64;
let quantile_idx = ((1.0 - alpha) * (n + 1.0)).ceil() as usize;
let quantile_idx = quantile_idx.min(sorted.len()).saturating_sub(1);
let q = sorted[quantile_idx];
let lower = (mu - q).max(1.0).floor() as u16;
let upper = (mu + q).ceil().max(1.0) as u16;
(lower, upper)
}
pub fn posterior_mean(&self, category: usize) -> f64 {
self.categories
.get(category)
.map(|c| c.posterior_mean)
.unwrap_or(self.config.prior_mean)
}
pub fn posterior_variance(&self, category: usize) -> f64 {
self.categories
.get(category)
.map(|c| {
let sigma_sq = if c.welford.n < 2 {
self.config.prior_variance
} else {
c.welford.variance()
};
sigma_sq / c.posterior_kappa
})
.unwrap_or(self.config.prior_variance)
}
pub fn total_measurements(&self) -> u64 {
self.total_measurements
}
pub fn total_violations(&self) -> u64 {
self.total_violations
}
pub fn violation_rate(&self) -> f64 {
if self.total_measurements == 0 {
return 0.0;
}
self.total_violations as f64 / self.total_measurements as f64
}
pub fn category_count(&self) -> usize {
self.categories.len()
}
pub fn category_observations(&self, category: usize) -> u64 {
self.categories
.get(category)
.map(|c| c.welford.n)
.unwrap_or(0)
}
}
impl Default for HeightPredictor {
fn default() -> Self {
Self::new(PredictorConfig::default())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unit_posterior_update() {
let config = PredictorConfig {
prior_mean: 2.0,
prior_strength: 1.0,
prior_variance: 4.0,
..Default::default()
};
let mut pred = HeightPredictor::new(config);
assert!((pred.posterior_mean(0) - 2.0).abs() < 1e-10);
pred.observe(0, 4);
assert!((pred.posterior_mean(0) - 3.0).abs() < 1e-10);
pred.observe(0, 4);
assert!((pred.posterior_mean(0) - 10.0 / 3.0).abs() < 1e-10);
}
#[test]
fn unit_posterior_variance_decreases() {
let mut pred = HeightPredictor::new(PredictorConfig {
prior_variance: 4.0,
..Default::default()
});
let var_0 = pred.posterior_variance(0);
assert!(var_0 > 0.0, "prior variance should be positive");
for i in 0..10 {
pred.observe(0, if i % 2 == 0 { 2 } else { 4 });
}
let var_10 = pred.posterior_variance(0);
for i in 0..90 {
pred.observe(0, if i % 2 == 0 { 2 } else { 4 });
}
let var_100 = pred.posterior_variance(0);
assert!(
var_10 < var_0,
"variance should decrease: {var_10} >= {var_0}"
);
assert!(
var_100 < var_10,
"variance should decrease: {var_100} >= {var_10}"
);
}
#[test]
fn unit_conformal_bounds() {
let config = PredictorConfig {
coverage: 0.90,
prior_mean: 3.0,
prior_strength: 1.0,
..Default::default()
};
let mut pred = HeightPredictor::new(config);
for _ in 0..50 {
pred.observe(0, 3);
}
let p = pred.predict(0);
assert_eq!(p.predicted, 3);
assert!(p.lower <= 3);
assert!(p.upper >= 3);
}
#[test]
fn conformal_bounds_widen_with_noise() {
let config = PredictorConfig {
coverage: 0.90,
prior_mean: 5.0,
prior_strength: 1.0,
..Default::default()
};
let mut pred = HeightPredictor::new(config);
for _ in 0..50 {
pred.observe(0, 5);
}
let tight = pred.predict(0);
let mut pred2 = HeightPredictor::new(PredictorConfig {
coverage: 0.90,
prior_mean: 5.0,
prior_strength: 1.0,
..Default::default()
});
let mut seed: u64 = 0xABCD_1234_5678_9ABC;
for _ in 0..50 {
seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let h = 3 + (seed >> 62) as u16; pred2.observe(0, h);
}
let wide = pred2.predict(0);
assert!(
(wide.upper - wide.lower) >= (tight.upper - tight.lower),
"noisy data should produce wider bounds"
);
}
#[test]
fn property_coverage() {
let alpha = 0.10;
let config = PredictorConfig {
coverage: 1.0 - alpha,
prior_mean: 3.0,
prior_strength: 2.0,
prior_variance: 4.0,
calibration_window: 100,
..Default::default()
};
let mut pred = HeightPredictor::new(config);
let mut seed: u64 = 0xDEAD_BEEF_CAFE_0001;
for _ in 0..100 {
seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let h = 2 + (seed >> 62) as u16; pred.observe(0, h);
}
let mut violations = 0u32;
let test_n = 200;
for _ in 0..test_n {
seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let h = 2 + (seed >> 62) as u16;
let within = pred.observe(0, h);
if !within {
violations += 1;
}
}
let viol_rate = violations as f64 / test_n as f64;
assert!(
viol_rate <= alpha + 0.15,
"violation rate {viol_rate} exceeds α + tolerance ({alpha} + 0.15)"
);
}
#[test]
fn e2e_scroll_stability() {
let mut pred = HeightPredictor::new(PredictorConfig {
prior_mean: 1.0,
prior_strength: 2.0,
default_height: 1,
coverage: 0.90,
..Default::default()
});
let mut corrections = 0u32;
for _ in 0..500 {
let within = pred.observe(0, 1);
if !within {
corrections += 1;
}
}
let p = pred.predict(0);
assert_eq!(p.predicted, 1);
assert!(corrections < 10, "too many corrections: {corrections}");
}
#[test]
fn categories_are_independent() {
let mut pred = HeightPredictor::default();
let cat_a = 0;
let cat_b = pred.register_category();
for _ in 0..20 {
pred.observe(cat_a, 1);
pred.observe(cat_b, 5);
}
let pa = pred.predict(cat_a);
let pb = pred.predict(cat_b);
assert_eq!(pa.predicted, 1);
assert!(pb.predicted >= 4 && pb.predicted <= 5);
}
#[test]
fn cold_prediction_uses_default() {
let pred = HeightPredictor::new(PredictorConfig {
default_height: 2,
prior_variance: 1.0,
..Default::default()
});
let p = pred.predict(0);
assert_eq!(p.predicted, 2);
assert_eq!(p.observations, 0);
}
#[test]
fn deterministic_under_same_observations() {
let run = || {
let mut pred = HeightPredictor::default();
let observations = [1, 2, 1, 3, 1, 2, 1, 1, 4, 1];
for &h in &observations {
pred.observe(0, h);
}
(pred.predict(0).predicted, pred.posterior_mean(0))
};
let (p1, m1) = run();
let (p2, m2) = run();
assert_eq!(p1, p2);
assert!((m1 - m2).abs() < 1e-15);
}
#[test]
fn perf_prediction_overhead() {
let mut pred = HeightPredictor::default();
for _ in 0..100 {
pred.observe(0, 2);
}
let start = std::time::Instant::now();
let mut _sink = 0u16;
for _ in 0..100_000 {
_sink = _sink.wrapping_add(pred.predict(0).predicted);
}
let elapsed = start.elapsed();
let per_prediction = elapsed / 100_000;
assert!(
per_prediction < std::time::Duration::from_micros(5),
"prediction too slow: {per_prediction:?}"
);
}
#[test]
fn violation_tracking() {
let mut pred = HeightPredictor::new(PredictorConfig {
prior_mean: 5.0,
prior_strength: 100.0, default_height: 5,
coverage: 0.95,
..Default::default()
});
for _ in 0..50 {
pred.observe(0, 5);
}
let within = pred.observe(0, 20);
assert!(!within, "extreme outlier should violate bounds");
assert!(pred.total_violations() > 0);
}
#[test]
fn config_default_values() {
let config = PredictorConfig::default();
assert_eq!(config.default_height, 1);
assert!((config.prior_strength - 2.0).abs() < f64::EPSILON);
assert!((config.prior_mean - 1.0).abs() < f64::EPSILON);
assert!((config.prior_variance - 4.0).abs() < f64::EPSILON);
assert!((config.coverage - 0.90).abs() < f64::EPSILON);
assert_eq!(config.calibration_window, 200);
}
#[test]
fn default_predictor_has_one_category() {
let pred = HeightPredictor::default();
assert_eq!(pred.category_count(), 1);
assert_eq!(pred.total_measurements(), 0);
assert_eq!(pred.total_violations(), 0);
assert!((pred.violation_rate() - 0.0).abs() < f64::EPSILON);
}
#[test]
fn predict_unknown_category_returns_cold() {
let pred = HeightPredictor::default();
let p = pred.predict(999);
assert_eq!(p.predicted, pred.config.default_height);
assert_eq!(p.observations, 0);
}
#[test]
fn observe_auto_creates_categories() {
let mut pred = HeightPredictor::default();
assert_eq!(pred.category_count(), 1);
pred.observe(3, 5);
assert_eq!(pred.category_count(), 4);
assert_eq!(pred.category_observations(3), 1);
}
#[test]
fn violation_rate_empty() {
let pred = HeightPredictor::default();
assert!((pred.violation_rate() - 0.0).abs() < f64::EPSILON);
}
#[test]
fn violation_rate_computation() {
let mut pred = HeightPredictor::new(PredictorConfig {
prior_mean: 5.0,
prior_strength: 100.0,
default_height: 5,
coverage: 0.95,
..Default::default()
});
for _ in 0..50 {
pred.observe(0, 5);
}
for _ in 0..10 {
pred.observe(0, 5);
}
let before_violations = pred.total_violations();
pred.observe(0, 100);
let after_violations = pred.total_violations();
assert!(after_violations > before_violations);
assert!(pred.violation_rate() > 0.0);
}
#[test]
fn category_observations_returns_zero_for_unknown() {
let pred = HeightPredictor::default();
assert_eq!(pred.category_observations(999), 0);
}
#[test]
fn category_observations_tracks_counts() {
let mut pred = HeightPredictor::default();
pred.observe(0, 3);
pred.observe(0, 4);
pred.observe(0, 5);
assert_eq!(pred.category_observations(0), 3);
}
#[test]
fn posterior_mean_unknown_returns_prior() {
let pred = HeightPredictor::default();
assert!((pred.posterior_mean(999) - pred.config.prior_mean).abs() < f64::EPSILON);
}
#[test]
fn posterior_variance_unknown_returns_prior() {
let pred = HeightPredictor::default();
assert!((pred.posterior_variance(999) - pred.config.prior_variance).abs() < f64::EPSILON);
}
#[test]
fn register_category_returns_sequential_ids() {
let mut pred = HeightPredictor::default();
let id1 = pred.register_category();
let id2 = pred.register_category();
assert_eq!(id1, 1);
assert_eq!(id2, 2);
assert_eq!(pred.category_count(), 3);
}
#[test]
fn observe_returns_true_for_consistent_data() {
let mut pred = HeightPredictor::new(PredictorConfig {
prior_mean: 3.0,
prior_strength: 1.0,
..Default::default()
});
for _ in 0..20 {
pred.observe(0, 3);
}
assert!(pred.observe(0, 3));
}
#[test]
fn total_measurements_increments() {
let mut pred = HeightPredictor::default();
for i in 0..7 {
pred.observe(0, (i + 1) as u16);
}
assert_eq!(pred.total_measurements(), 7);
}
#[test]
fn prediction_lower_le_predicted_le_upper() {
let mut pred = HeightPredictor::default();
for _ in 0..30 {
pred.observe(0, 3);
}
let p = pred.predict(0);
assert!(p.lower <= p.predicted);
assert!(p.predicted <= p.upper);
}
#[test]
fn observe_height_zero() {
let mut pred = HeightPredictor::default();
pred.observe(0, 0);
let p = pred.predict(0);
assert!(p.predicted >= 1);
}
#[test]
fn observe_height_max_u16() {
let mut pred = HeightPredictor::default();
pred.observe(0, u16::MAX);
let p = pred.predict(0);
assert!(p.predicted > 0);
assert!(p.observations == 1);
}
#[test]
fn cold_prediction_zero_variance() {
let pred = HeightPredictor::new(PredictorConfig {
default_height: 5,
prior_variance: 0.0,
..Default::default()
});
let p = pred.predict(0);
assert_eq!(p.predicted, 5);
assert_eq!(p.lower, 5);
assert_eq!(p.upper, 5);
}
#[test]
fn cold_prediction_large_variance() {
let pred = HeightPredictor::new(PredictorConfig {
default_height: 1,
prior_variance: 10000.0,
..Default::default()
});
let p = pred.predict(0);
assert_eq!(p.predicted, 1);
assert_eq!(p.lower, 0); }
#[test]
fn coverage_zero() {
let mut pred = HeightPredictor::new(PredictorConfig {
coverage: 0.0,
prior_mean: 3.0,
prior_strength: 1.0,
..Default::default()
});
for _ in 0..20 {
pred.observe(0, 3);
}
let p = pred.predict(0);
assert!(p.predicted > 0);
}
#[test]
fn coverage_one() {
let mut pred = HeightPredictor::new(PredictorConfig {
coverage: 1.0,
prior_mean: 3.0,
prior_strength: 1.0,
..Default::default()
});
for _ in 0..20 {
pred.observe(0, 3);
}
for _ in 0..5 {
pred.observe(0, 10);
}
let p = pred.predict(0);
assert!(p.lower <= p.predicted);
assert!(p.predicted <= p.upper);
}
#[test]
fn calibration_window_one() {
let mut pred = HeightPredictor::new(PredictorConfig {
calibration_window: 1,
prior_mean: 3.0,
prior_strength: 1.0,
..Default::default()
});
for _ in 0..10 {
pred.observe(0, 3);
}
let p = pred.predict(0);
assert!(p.predicted > 0);
assert!(p.lower <= p.predicted);
}
#[test]
fn single_observation_uses_wide_bounds() {
let mut pred = HeightPredictor::new(PredictorConfig {
prior_mean: 5.0,
prior_strength: 1.0,
prior_variance: 4.0,
..Default::default()
});
pred.observe(0, 5);
let p = pred.predict(0);
assert_eq!(p.observations, 1);
assert!(p.lower <= p.predicted);
assert!(p.predicted <= p.upper);
}
#[test]
fn predictor_config_clone_and_debug() {
let config = PredictorConfig::default();
let cloned = config.clone();
assert_eq!(cloned.default_height, config.default_height);
let dbg = format!("{:?}", config);
assert!(dbg.contains("PredictorConfig"));
}
#[test]
fn height_prediction_copy_and_debug() {
let p = HeightPrediction {
predicted: 3,
lower: 1,
upper: 5,
observations: 10,
};
let p2 = p; assert_eq!(p.predicted, p2.predicted);
assert_eq!(p.lower, p2.lower);
assert_eq!(p.upper, p2.upper);
assert_eq!(p.observations, p2.observations);
let dbg = format!("{:?}", p);
assert!(dbg.contains("HeightPrediction"));
}
#[test]
fn height_prediction_clone() {
fn assert_clone<T: Clone>() {}
assert_clone::<HeightPrediction>();
let p = HeightPrediction {
predicted: 2,
lower: 1,
upper: 4,
observations: 5,
};
let cloned = p; assert_eq!(cloned.predicted, 2);
}
#[test]
fn predictor_clone_independence() {
let mut pred = HeightPredictor::default();
pred.observe(0, 5);
pred.observe(0, 5);
let mut cloned = pred.clone();
cloned.observe(0, 100);
assert_eq!(pred.total_measurements(), 2);
assert_eq!(cloned.total_measurements(), 3);
}
#[test]
fn predictor_debug() {
let pred = HeightPredictor::default();
let dbg = format!("{:?}", pred);
assert!(dbg.contains("HeightPredictor"));
}
#[test]
fn posterior_variance_with_two_identical_observations() {
let mut pred = HeightPredictor::new(PredictorConfig {
prior_variance: 4.0,
prior_strength: 1.0,
..Default::default()
});
pred.observe(0, 3);
pred.observe(0, 3);
let var = pred.posterior_variance(0);
assert!(var.abs() < 1e-10, "identical obs should give ~0 variance");
}
#[test]
fn posterior_variance_with_one_observation_uses_prior() {
let mut pred = HeightPredictor::new(PredictorConfig {
prior_variance: 4.0,
prior_strength: 2.0,
..Default::default()
});
pred.observe(0, 3);
let var = pred.posterior_variance(0);
assert!((var - 4.0 / 3.0).abs() < 1e-10);
}
#[test]
fn observe_returns_false_for_first_cold_outlier() {
let mut pred = HeightPredictor::new(PredictorConfig {
default_height: 1,
prior_mean: 1.0,
prior_strength: 2.0,
prior_variance: 0.25,
..Default::default()
});
let within = pred.observe(0, 100);
assert!(within || pred.total_violations() == 0);
}
#[test]
fn all_same_height_converges_exactly() {
let mut pred = HeightPredictor::new(PredictorConfig {
prior_mean: 3.0,
prior_strength: 1.0,
..Default::default()
});
for _ in 0..100 {
pred.observe(0, 3);
}
let p = pred.predict(0);
assert_eq!(p.predicted, 3);
assert_eq!(p.lower, 3);
assert_eq!(p.upper, 3);
}
#[test]
fn many_categories_auto_created() {
let mut pred = HeightPredictor::default();
pred.observe(10, 5);
assert_eq!(pred.category_count(), 11);
assert_eq!(pred.category_observations(5), 0);
assert_eq!(pred.category_observations(10), 1);
}
#[test]
fn prediction_bounds_ordering_after_mixed_data() {
let mut pred = HeightPredictor::default();
for h in [1, 2, 5, 10, 1, 3, 7, 2, 4, 6] {
pred.observe(0, h);
}
let p = pred.predict(0);
assert!(
p.lower <= p.predicted,
"lower={} > predicted={}",
p.lower,
p.predicted
);
assert!(
p.predicted <= p.upper,
"predicted={} > upper={}",
p.predicted,
p.upper
);
}
}