use crate::error::{Result, TimeSeriesError};
#[derive(Debug, Clone)]
pub struct PredictionInterval {
pub lower: f64,
pub prediction: f64,
pub upper: f64,
pub coverage_level: f64,
}
#[derive(Debug, Clone)]
pub struct PredictionIntervals {
pub intervals: Vec<PredictionInterval>,
pub method: String,
pub empirical_coverage: Option<f64>,
}
#[derive(Debug, Clone)]
pub struct ConformalConfig {
pub coverage: f64,
pub calibration_fraction: f64,
}
impl Default for ConformalConfig {
fn default() -> Self {
Self {
coverage: 0.95,
calibration_fraction: 0.2,
}
}
}
pub struct ConformalPrediction {
config: ConformalConfig,
calibration_scores: Option<Vec<f64>>,
quantile: Option<f64>,
}
impl ConformalPrediction {
pub fn new(config: ConformalConfig) -> Self {
Self {
config,
calibration_scores: None,
quantile: None,
}
}
pub fn calibrate(&mut self, actuals: &[f64], predictions: &[f64]) -> Result<()> {
if actuals.len() != predictions.len() {
return Err(TimeSeriesError::DimensionMismatch {
expected: actuals.len(),
actual: predictions.len(),
});
}
if actuals.is_empty() {
return Err(TimeSeriesError::InsufficientData {
message: "Calibration set is empty".to_string(),
required: 1,
actual: 0,
});
}
let scores: Vec<f64> = actuals.iter().zip(predictions.iter())
.map(|(&a, &p)| (a - p).abs())
.collect();
let n = scores.len();
let alpha = 1.0 - self.config.coverage;
let q_idx = ((n as f64 + 1.0) * (1.0 - alpha)).ceil() as usize;
let q_idx = q_idx.min(n);
let mut sorted = scores.clone();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let quantile = if q_idx >= n {
f64::INFINITY
} else {
sorted[q_idx - 1]
};
self.calibration_scores = Some(scores);
self.quantile = Some(quantile);
Ok(())
}
pub fn predict_intervals(&self, predictions: &[f64]) -> Result<PredictionIntervals> {
let q = self.quantile.ok_or_else(|| {
TimeSeriesError::ModelNotFitted("ConformalPrediction not calibrated".to_string())
})?;
let intervals: Vec<PredictionInterval> = predictions.iter().map(|&pred| {
PredictionInterval {
lower: pred - q,
prediction: pred,
upper: pred + q,
coverage_level: self.config.coverage,
}
}).collect();
Ok(PredictionIntervals {
intervals,
method: "SplitConformal".to_string(),
empirical_coverage: None,
})
}
pub fn fit_predict(
&mut self,
train_actuals: &[f64],
train_preds: &[f64],
test_preds: &[f64],
) -> Result<PredictionIntervals> {
self.calibrate(train_actuals, train_preds)?;
self.predict_intervals(test_preds)
}
}
#[derive(Debug, Clone)]
pub struct EnbPIConfig {
pub coverage: f64,
pub n_ensembles: usize,
pub beta: f64,
pub seed: u64,
}
impl Default for EnbPIConfig {
fn default() -> Self {
Self {
coverage: 0.95,
n_ensembles: 50,
beta: 0.95,
seed: 42,
}
}
}
pub struct EnbPI {
config: EnbPIConfig,
scores: Vec<f64>,
ensemble_preds: Option<Vec<f64>>,
}
impl EnbPI {
pub fn new(config: EnbPIConfig) -> Self {
Self {
config,
scores: Vec::new(),
ensemble_preds: None,
}
}
pub fn initialize(&mut self, residuals: &[f64]) -> Result<()> {
if residuals.is_empty() {
return Err(TimeSeriesError::InsufficientData {
message: "Residuals for EnbPI initialization cannot be empty".to_string(),
required: 1,
actual: 0,
});
}
self.scores = residuals.iter().map(|&r| r.abs()).collect();
Ok(())
}
fn current_quantile(&self) -> f64 {
let n = self.scores.len();
if n == 0 {
return f64::INFINITY;
}
let alpha = 1.0 - self.config.coverage;
let q_idx = ((n as f64 + 1.0) * (1.0 - alpha)).ceil() as usize;
let q_idx = q_idx.min(n);
let mut sorted = self.scores.clone();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
if q_idx == 0 || q_idx > sorted.len() {
f64::INFINITY
} else {
sorted[q_idx - 1]
}
}
pub fn predict_and_update(&mut self, prediction: f64, actual: Option<f64>) -> Result<PredictionInterval> {
let q = self.current_quantile();
let interval = PredictionInterval {
lower: prediction - q,
prediction,
upper: prediction + q,
coverage_level: self.config.coverage,
};
if let Some(obs) = actual {
let new_score = (obs - prediction).abs();
self.scores.push(new_score);
if self.scores.len() > 1 {
let target_len = (self.scores.len() as f64 * self.config.beta).ceil() as usize;
let target_len = target_len.max(1);
if self.scores.len() > target_len * 2 {
let remove_n = self.scores.len() - target_len;
self.scores.drain(0..remove_n);
}
}
}
Ok(interval)
}
pub fn predict_batch(&self, predictions: &[f64]) -> Result<PredictionIntervals> {
let q = self.current_quantile();
let intervals: Vec<PredictionInterval> = predictions.iter().map(|&pred| {
PredictionInterval {
lower: pred - q,
prediction: pred,
upper: pred + q,
coverage_level: self.config.coverage,
}
}).collect();
Ok(PredictionIntervals {
intervals,
method: "EnbPI".to_string(),
empirical_coverage: None,
})
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BootstrapMethod {
Residual,
Block,
}
#[derive(Debug, Clone)]
pub struct BootstrapPIConfig {
pub coverage: f64,
pub n_bootstrap: usize,
pub block_length: usize,
pub method: BootstrapMethod,
pub seed: u64,
}
impl Default for BootstrapPIConfig {
fn default() -> Self {
Self {
coverage: 0.95,
n_bootstrap: 1000,
block_length: 10,
method: BootstrapMethod::Residual,
seed: 42,
}
}
}
pub struct BootstrapPI {
config: BootstrapPIConfig,
residuals: Option<Vec<f64>>,
}
impl BootstrapPI {
pub fn new(config: BootstrapPIConfig) -> Self {
Self {
config,
residuals: None,
}
}
pub fn set_residuals(&mut self, residuals: Vec<f64>) -> Result<()> {
if residuals.is_empty() {
return Err(TimeSeriesError::InsufficientData {
message: "Residuals cannot be empty".to_string(),
required: 1,
actual: 0,
});
}
self.residuals = Some(residuals);
Ok(())
}
fn lcg_sample(state: &mut u64, n: usize) -> usize {
*state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
((*state >> 33) as usize) % n
}
fn block_bootstrap_residuals(residuals: &[f64], block_len: usize, state: &mut u64) -> Vec<f64> {
let n = residuals.len();
let n_blocks = (n + block_len - 1) / block_len;
let mut sample = Vec::with_capacity(n);
for _ in 0..n_blocks {
let start = Self::lcg_sample(state, n.saturating_sub(block_len).max(1));
for j in 0..block_len {
if sample.len() >= n {
break;
}
sample.push(residuals[(start + j) % n]);
}
}
sample.truncate(n);
sample
}
pub fn predict_intervals(&self, predictions: &[f64]) -> Result<PredictionIntervals> {
let residuals = self.residuals.as_ref().ok_or_else(|| {
TimeSeriesError::ModelNotFitted("BootstrapPI residuals not set".to_string())
})?;
let n_res = residuals.len();
if n_res == 0 {
return Err(TimeSeriesError::InsufficientData {
message: "Empty residuals".to_string(),
required: 1,
actual: 0,
});
}
let alpha = 1.0 - self.config.coverage;
let lower_q = (alpha / 2.0 * self.config.n_bootstrap as f64) as usize;
let upper_q = ((1.0 - alpha / 2.0) * self.config.n_bootstrap as f64) as usize;
let mut state = self.config.seed;
let intervals: Vec<PredictionInterval> = predictions.iter().map(|&pred| {
let mut boot_vals: Vec<f64> = (0..self.config.n_bootstrap)
.map(|_| {
let sampled_res = match self.config.method {
BootstrapMethod::Residual => {
let idx = Self::lcg_sample(&mut state, n_res);
residuals[idx]
}
BootstrapMethod::Block => {
let boot = Self::block_bootstrap_residuals(residuals, self.config.block_length, &mut state);
let idx = Self::lcg_sample(&mut state, boot.len().max(1));
boot.get(idx).cloned().unwrap_or(0.0)
}
};
pred + sampled_res
})
.collect();
boot_vals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let lower = boot_vals.get(lower_q.max(1) - 1).cloned().unwrap_or(pred);
let upper = boot_vals.get(upper_q.min(boot_vals.len()) - 1).cloned().unwrap_or(pred);
PredictionInterval {
lower,
prediction: pred,
upper,
coverage_level: self.config.coverage,
}
}).collect();
Ok(PredictionIntervals {
intervals,
method: format!("Bootstrap({})", match self.config.method {
BootstrapMethod::Residual => "Residual",
BootstrapMethod::Block => "Block",
}),
empirical_coverage: None,
})
}
}
#[derive(Debug, Clone)]
pub struct BayesianPIConfig {
pub coverage: f64,
pub prior_var: f64,
pub n_samples: usize,
pub seed: u64,
}
impl Default for BayesianPIConfig {
fn default() -> Self {
Self {
coverage: 0.95,
prior_var: 1.0,
n_samples: 10000,
seed: 42,
}
}
}
pub struct BayesianPI {
config: BayesianPIConfig,
sigma_sq: Option<f64>,
posterior_mean: Option<f64>,
posterior_var: Option<f64>,
}
impl BayesianPI {
pub fn new(config: BayesianPIConfig) -> Self {
Self {
config,
sigma_sq: None,
posterior_mean: None,
posterior_var: None,
}
}
pub fn fit(&mut self, residuals: &[f64]) -> Result<()> {
let n = residuals.len();
if n < 2 {
return Err(TimeSeriesError::InsufficientData {
message: "Need at least 2 residuals for Bayesian PI".to_string(),
required: 2,
actual: n,
});
}
let mean: f64 = residuals.iter().sum::<f64>() / n as f64;
let var: f64 = residuals.iter().map(|&r| (r - mean).powi(2)).sum::<f64>() / (n - 1) as f64;
let sigma_sq = var.max(1e-15);
let tau_sq = self.config.prior_var;
let mu_0 = 0.0_f64;
let tau_n_sq = 1.0 / (1.0 / tau_sq + n as f64 / sigma_sq);
let sum_res: f64 = residuals.iter().sum();
let mu_n = tau_n_sq * (mu_0 / tau_sq + sum_res / sigma_sq);
self.sigma_sq = Some(sigma_sq);
self.posterior_mean = Some(mu_n);
self.posterior_var = Some(tau_n_sq);
Ok(())
}
fn normal_quantile(p: f64) -> f64 {
let p = p.clamp(1e-10, 1.0 - 1e-10);
let t = (-2.0 * (p.min(1.0 - p)).ln()).sqrt();
let c = [2.515517_f64, 0.802853, 0.010328];
let d = [1.432788_f64, 0.189269, 0.001308];
let num = c[0] + c[1] * t + c[2] * t * t;
let den = 1.0 + d[0] * t + d[1] * t * t + d[2] * t * t * t;
let z = t - num / den;
if p >= 0.5 { z } else { -z }
}
pub fn predict_intervals(&self, predictions: &[f64]) -> Result<PredictionIntervals> {
let sigma_sq = self.sigma_sq.ok_or_else(|| {
TimeSeriesError::ModelNotFitted("BayesianPI not fitted".to_string())
})?;
let posterior_mean = self.posterior_mean.ok_or_else(|| {
TimeSeriesError::ModelNotFitted("BayesianPI not fitted".to_string())
})?;
let posterior_var = self.posterior_var.ok_or_else(|| {
TimeSeriesError::ModelNotFitted("BayesianPI not fitted".to_string())
})?;
let pred_var = sigma_sq + posterior_var;
let pred_std = pred_var.sqrt();
let alpha = 1.0 - self.config.coverage;
let z_lower = Self::normal_quantile(alpha / 2.0);
let z_upper = Self::normal_quantile(1.0 - alpha / 2.0);
let intervals: Vec<PredictionInterval> = predictions.iter().map(|&point_pred| {
let pred_center = point_pred + posterior_mean;
PredictionInterval {
lower: pred_center + z_lower * pred_std,
prediction: pred_center,
upper: pred_center + z_upper * pred_std,
coverage_level: self.config.coverage,
}
}).collect();
Ok(PredictionIntervals {
intervals,
method: "BayesianPosteriorPredictive".to_string(),
empirical_coverage: None,
})
}
}
#[derive(Debug, Clone)]
pub struct CoverageTestResult {
pub empirical_coverage: f64,
pub nominal_coverage: f64,
pub n_observations: usize,
pub n_covered: usize,
pub coverage_deficit: f64,
pub winkler_score: f64,
pub mean_width: f64,
pub passes: bool,
pub tolerance: f64,
}
pub struct CoverageTest {
pub tolerance: f64,
}
impl CoverageTest {
pub fn new(tolerance: f64) -> Self {
Self { tolerance }
}
pub fn evaluate(
&self,
actuals: &[f64],
intervals: &PredictionIntervals,
) -> Result<CoverageTestResult> {
let n = actuals.len();
if n == 0 {
return Err(TimeSeriesError::InsufficientData {
message: "No observations to evaluate".to_string(),
required: 1,
actual: 0,
});
}
if intervals.intervals.len() != n {
return Err(TimeSeriesError::DimensionMismatch {
expected: n,
actual: intervals.intervals.len(),
});
}
let n_covered: usize = actuals.iter().zip(intervals.intervals.iter())
.filter(|(&a, pi)| a >= pi.lower && a <= pi.upper)
.count();
let empirical_coverage = n_covered as f64 / n as f64;
let nominal_coverage = intervals.intervals.first().map(|pi| pi.coverage_level).unwrap_or(0.95);
let alpha = 1.0 - nominal_coverage;
let mut winkler_total = 0.0_f64;
let mut total_width = 0.0_f64;
for (&a, pi) in actuals.iter().zip(intervals.intervals.iter()) {
let width = pi.upper - pi.lower;
total_width += width;
let penalty = if a < pi.lower {
2.0 / alpha * (pi.lower - a)
} else if a > pi.upper {
2.0 / alpha * (a - pi.upper)
} else {
0.0
};
winkler_total += width + penalty;
}
let winkler_score = winkler_total / n as f64;
let mean_width = total_width / n as f64;
let coverage_deficit = empirical_coverage - nominal_coverage;
let passes = coverage_deficit.abs() <= self.tolerance;
Ok(CoverageTestResult {
empirical_coverage,
nominal_coverage,
n_observations: n,
n_covered,
coverage_deficit,
winkler_score,
mean_width,
passes,
tolerance: self.tolerance,
})
}
pub fn rolling_coverage(
&self,
actuals: &[f64],
intervals: &PredictionIntervals,
window: usize,
) -> Result<Vec<f64>> {
let n = actuals.len();
if n < window {
return Err(TimeSeriesError::InsufficientData {
message: "Data too short for rolling coverage".to_string(),
required: window,
actual: n,
});
}
if intervals.intervals.len() != n {
return Err(TimeSeriesError::DimensionMismatch {
expected: n,
actual: intervals.intervals.len(),
});
}
let covered: Vec<bool> = actuals.iter().zip(intervals.intervals.iter())
.map(|(&a, pi)| a >= pi.lower && a <= pi.upper)
.collect();
let rolling: Vec<f64> = (0..=n - window)
.map(|i| {
covered[i..i + window].iter().filter(|&&c| c).count() as f64 / window as f64
})
.collect();
Ok(rolling)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_noisy_sine(n: usize, noise_std: f64) -> (Vec<f64>, Vec<f64>) {
let mut state = 42_u64;
let actuals: Vec<f64> = (0..n).map(|i| {
state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
let noise = ((state >> 11) as f64 / (1u64 << 53) as f64 - 0.5) * 2.0 * noise_std;
(2.0 * std::f64::consts::PI * i as f64 / 20.0).sin() + noise
}).collect();
let predictions: Vec<f64> = (0..n)
.map(|i| (2.0 * std::f64::consts::PI * i as f64 / 20.0).sin())
.collect();
(actuals, predictions)
}
#[test]
fn test_conformal_prediction_calibrate_predict() {
let (actuals, preds) = make_noisy_sine(100, 0.5);
let cal_actuals = &actuals[..80];
let cal_preds = &preds[..80];
let test_preds = &preds[80..];
let mut cp = ConformalPrediction::new(ConformalConfig { coverage: 0.9, ..Default::default() });
cp.calibrate(cal_actuals, cal_preds).expect("Calibration failed");
let intervals = cp.predict_intervals(test_preds).expect("Prediction failed");
assert_eq!(intervals.intervals.len(), 20);
for pi in &intervals.intervals {
assert!(pi.lower <= pi.upper, "Lower bound exceeds upper bound");
assert!((pi.coverage_level - 0.9).abs() < 1e-10);
}
}
#[test]
fn test_enbpi_predict_update() {
let (actuals, preds) = make_noisy_sine(100, 0.3);
let train_residuals: Vec<f64> = actuals[..80].iter().zip(preds[..80].iter())
.map(|(&a, &p)| a - p)
.collect();
let mut enbpi = EnbPI::new(EnbPIConfig::default());
enbpi.initialize(&train_residuals).expect("Initialize failed");
for i in 80..100 {
let pi = enbpi.predict_and_update(preds[i], Some(actuals[i])).expect("Predict failed");
assert!(pi.lower <= pi.upper);
}
}
#[test]
fn test_bootstrap_pi_residual() {
let (actuals, preds) = make_noisy_sine(100, 0.5);
let residuals: Vec<f64> = actuals.iter().zip(preds.iter()).map(|(&a, &p)| a - p).collect();
let mut bpi = BootstrapPI::new(BootstrapPIConfig {
n_bootstrap: 100,
..Default::default()
});
bpi.set_residuals(residuals).expect("Set residuals failed");
let test_preds = vec![0.0, 0.5, 1.0, 0.5];
let intervals = bpi.predict_intervals(&test_preds).expect("Prediction failed");
assert_eq!(intervals.intervals.len(), 4);
for pi in &intervals.intervals {
assert!(pi.lower <= pi.upper, "Lower bound exceeds upper bound");
}
}
#[test]
fn test_bootstrap_pi_block() {
let (actuals, preds) = make_noisy_sine(100, 0.5);
let residuals: Vec<f64> = actuals.iter().zip(preds.iter()).map(|(&a, &p)| a - p).collect();
let mut bpi = BootstrapPI::new(BootstrapPIConfig {
method: BootstrapMethod::Block,
block_length: 5,
n_bootstrap: 100,
..Default::default()
});
bpi.set_residuals(residuals).expect("Set residuals failed");
let test_preds = vec![0.0, 0.5];
let intervals = bpi.predict_intervals(&test_preds).expect("Prediction failed");
assert_eq!(intervals.intervals.len(), 2);
}
#[test]
fn test_bayesian_pi() {
let (actuals, preds) = make_noisy_sine(100, 0.5);
let residuals: Vec<f64> = actuals.iter().zip(preds.iter()).map(|(&a, &p)| a - p).collect();
let mut bpi = BayesianPI::new(BayesianPIConfig::default());
bpi.fit(&residuals).expect("Fit failed");
let test_preds = &preds[80..100];
let intervals = bpi.predict_intervals(test_preds).expect("Prediction failed");
assert_eq!(intervals.intervals.len(), 20);
for pi in &intervals.intervals {
assert!(pi.lower <= pi.upper, "Lower bound exceeds upper bound");
}
}
#[test]
fn test_coverage_test_evaluate() {
let (actuals, preds) = make_noisy_sine(100, 0.3);
let residuals: Vec<f64> = actuals.iter().zip(preds.iter()).map(|(&a, &p)| a - p).collect();
let mut bpi = BootstrapPI::new(BootstrapPIConfig { n_bootstrap: 200, ..Default::default() });
bpi.set_residuals(residuals).expect("Set residuals failed");
let test_actuals = &actuals[..50];
let test_preds_slice = &preds[..50];
let intervals = bpi.predict_intervals(test_preds_slice).expect("Prediction failed");
let ct = CoverageTest::new(0.15); let result = ct.evaluate(test_actuals, &intervals).expect("Coverage test failed");
assert!(result.empirical_coverage >= 0.0 && result.empirical_coverage <= 1.0);
assert_eq!(result.n_observations, 50);
assert!(result.mean_width >= 0.0);
assert!(result.winkler_score >= 0.0);
}
#[test]
fn test_rolling_coverage() {
let (actuals, preds) = make_noisy_sine(100, 0.3);
let residuals: Vec<f64> = actuals.iter().zip(preds.iter()).map(|(&a, &p)| a - p).collect();
let mut bpi = BootstrapPI::new(BootstrapPIConfig { n_bootstrap: 100, ..Default::default() });
bpi.set_residuals(residuals).expect("Set residuals failed");
let intervals = bpi.predict_intervals(&preds).expect("Prediction failed");
let ct = CoverageTest::new(0.1);
let rolling = ct.rolling_coverage(&actuals, &intervals, 20).expect("Rolling coverage failed");
assert_eq!(rolling.len(), 81); for &cov in &rolling {
assert!(cov >= 0.0 && cov <= 1.0);
}
}
}