use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::Array1;
use super::analysis::{peaks_over_threshold, PotResult};
use super::distributions::GeneralizedExtremeValue;
use super::estimation::gev_fit_mle;
pub fn value_at_risk(data: &[f64], confidence: f64) -> StatsResult<f64> {
if data.len() < 2 {
return Err(StatsError::InsufficientData(
"value_at_risk requires at least 2 observations".into(),
));
}
if confidence <= 0.0 || confidence >= 1.0 {
return Err(StatsError::InvalidArgument(format!(
"confidence must be in (0, 1), got {confidence}"
)));
}
let mut sorted = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
let idx = confidence * (n as f64 - 1.0);
let lo = idx.floor() as usize;
let hi = (lo + 1).min(n - 1);
let frac = idx - lo as f64;
Ok(sorted[lo] * (1.0 - frac) + sorted[hi] * frac)
}
pub fn conditional_var(data: &[f64], confidence: f64) -> StatsResult<f64> {
if data.len() < 2 {
return Err(StatsError::InsufficientData(
"conditional_var requires at least 2 observations".into(),
));
}
if confidence <= 0.0 || confidence >= 1.0 {
return Err(StatsError::InvalidArgument(format!(
"confidence must be in (0, 1), got {confidence}"
)));
}
let var_p = value_at_risk(data, confidence)?;
let tail: Vec<f64> = data.iter().copied().filter(|&x| x > var_p).collect();
if tail.is_empty() {
return Ok(var_p);
}
let mean_tail: f64 = tail.iter().sum::<f64>() / tail.len() as f64;
Ok(mean_tail)
}
pub fn tail_index_hill(data: &[f64], k: usize) -> StatsResult<f64> {
let n = data.len();
if k == 0 {
return Err(StatsError::InvalidArgument("k must be >= 1".into()));
}
if n < k + 1 {
return Err(StatsError::InsufficientData(format!(
"tail_index_hill requires at least k+1 = {} observations, got {n}",
k + 1
)));
}
let arr = Array1::from(data.to_vec());
super::analysis::hill_estimator(arr.view(), k)
}
pub fn pot_analysis(data: &[f64], threshold: f64) -> StatsResult<PotResult> {
if !threshold.is_finite() {
return Err(StatsError::InvalidArgument(
"threshold must be a finite number".into(),
));
}
let arr = Array1::from(data.to_vec());
peaks_over_threshold(arr.view(), threshold, &[10.0, 25.0, 50.0, 100.0, 200.0])
}
#[derive(Debug, Clone)]
pub struct BlockMaximaFit {
pub distribution: GeneralizedExtremeValue,
pub log_likelihood: f64,
pub aic: f64,
pub bic: f64,
pub n_obs: usize,
}
impl BlockMaximaFit {
pub fn return_level(&self, return_period: f64) -> StatsResult<f64> {
self.distribution.return_level(return_period)
}
}
pub fn fit_block_maxima(data: &[f64], block_size: usize) -> StatsResult<BlockMaximaFit> {
if block_size == 0 {
return Err(StatsError::InvalidArgument(
"block_size must be >= 1".into(),
));
}
let n = data.len();
let n_blocks = n / block_size;
if n_blocks < 2 {
return Err(StatsError::InsufficientData(format!(
"fit_block_maxima: need at least 2 complete blocks (block_size={block_size}), \
data length {n} gives {n_blocks} block(s)"
)));
}
let maxima: Vec<f64> = (0..n_blocks)
.map(|b| {
let start = b * block_size;
let end = start + block_size;
data[start..end]
.iter()
.copied()
.fold(f64::NEG_INFINITY, f64::max)
})
.collect();
if maxima.len() < 3 {
return Err(StatsError::InsufficientData(
"fit_block_maxima: need at least 3 block maxima for GEV fitting".into(),
));
}
let arr = Array1::from(maxima.clone());
let distribution = gev_fit_mle(arr.view())?;
let n_obs = maxima.len();
let ll: f64 = maxima
.iter()
.map(|&x| {
let p = distribution.pdf(x);
if p > 0.0 && p.is_finite() {
p.ln()
} else {
-1e10
}
})
.sum();
let k = 3.0_f64;
let nf = n_obs as f64;
Ok(BlockMaximaFit {
distribution,
log_likelihood: ll,
aic: -2.0 * ll + 2.0 * k,
bic: -2.0 * ll + k * nf.ln(),
n_obs,
})
}
pub fn return_level_ci(data: &[f64], return_period: f64, alpha: f64) -> StatsResult<(f64, f64)> {
if return_period <= 1.0 {
return Err(StatsError::InvalidArgument(format!(
"return_period must be > 1, got {return_period}"
)));
}
if alpha <= 0.0 || alpha >= 1.0 {
return Err(StatsError::InvalidArgument(format!(
"alpha must be in (0, 1), got {alpha}"
)));
}
if data.len() < 3 {
return Err(StatsError::InsufficientData(
"return_level_ci requires at least 3 observations".into(),
));
}
let arr = Array1::from(data.to_vec());
let gev = gev_fit_mle(arr.view())?;
let rl = gev.return_level(return_period)?;
let n = data.len() as f64;
let se = delta_method_se_gev(&gev, return_period, n)?;
let z = normal_quantile(1.0 - alpha / 2.0);
Ok((rl - z * se, rl + z * se))
}
fn delta_method_se_gev(
gev: &GeneralizedExtremeValue,
return_period: f64,
n: f64,
) -> StatsResult<f64> {
let sigma = gev.sigma;
let xi = gev.xi;
let p = 1.0 - 1.0 / return_period;
let y = -p.ln().max(1e-300); let xi_thresh = 1e-8_f64;
let (grad_mu, grad_sigma, grad_xi) = if xi.abs() < xi_thresh {
let ln_y = y.ln();
(1.0_f64, -ln_y, 0.5 * ln_y * ln_y * sigma)
} else {
let y_xi = y.powf(-xi);
let factor = (y_xi - 1.0) / xi;
let grad_x = sigma * (y_xi * (-y.ln()) / xi - factor) / xi;
(1.0_f64, factor, grad_x)
};
let var_mu = sigma * sigma / n;
let var_sigma = sigma * sigma / n;
let var_xi = (1.0 + xi).powi(2).max(0.01) / n;
let var_rl = grad_mu * grad_mu * var_mu
+ grad_sigma * grad_sigma * var_sigma
+ grad_xi * grad_xi * var_xi;
Ok(var_rl.sqrt().max(0.0))
}
fn normal_quantile(p: f64) -> f64 {
if p <= 0.0 {
return f64::NEG_INFINITY;
}
if p >= 1.0 {
return f64::INFINITY;
}
if (p - 0.5).abs() < 0.42 {
let q = p - 0.5;
let r = q * q;
q * ((2.515_517 + 0.802_853 * r + 0.010_328 * r * r)
/ (1.0 + 1.432_788 * r + 0.189_269 * r * r + 0.001_308 * r * r * r))
} else {
let pp = if p < 0.5 { p } else { 1.0 - p };
let t = (-2.0 * pp.ln()).sqrt();
let r = t;
let r2 = r * r;
let c = 2.515_517 + 0.802_853 * r + 0.010_328 * r2;
let d = 1.0 + 1.432_788 * r + 0.189_269 * r2 + 0.001_308 * r2 * r;
let val = t - c / d;
if p < 0.5 {
-val
} else {
val
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn approx_eq(a: f64, b: f64, tol: f64) -> bool {
(a - b).abs() < tol
}
#[test]
fn test_value_at_risk_basic() {
let data: Vec<f64> = (1..=100).map(|i| i as f64).collect();
let var95 = value_at_risk(&data, 0.95).expect("var");
assert!(var95 >= 94.0 && var95 <= 96.0, "Expected ~95, got {var95}");
}
#[test]
fn test_value_at_risk_monotone() {
let data: Vec<f64> = (1..=100).map(|i| i as f64).collect();
let var50 = value_at_risk(&data, 0.50).expect("var50");
let var90 = value_at_risk(&data, 0.90).expect("var90");
let var99 = value_at_risk(&data, 0.99).expect("var99");
assert!(var50 <= var90, "VaR must be monotone: {var50} <= {var90}");
assert!(var90 <= var99, "VaR must be monotone: {var90} <= {var99}");
}
#[test]
fn test_value_at_risk_invalid_confidence() {
let data = vec![1.0, 2.0, 3.0];
assert!(value_at_risk(&data, 0.0).is_err());
assert!(value_at_risk(&data, 1.0).is_err());
assert!(value_at_risk(&data, -0.1).is_err());
assert!(value_at_risk(&data, 1.1).is_err());
}
#[test]
fn test_value_at_risk_insufficient_data() {
assert!(value_at_risk(&[1.0], 0.95).is_err());
}
#[test]
fn test_conditional_var_ge_var() {
let data: Vec<f64> = (1..=100).map(|i| i as f64).collect();
let var95 = value_at_risk(&data, 0.95).expect("var");
let cvar95 = conditional_var(&data, 0.95).expect("cvar");
assert!(cvar95 >= var95 - 1e-10, "CVaR={cvar95} >= VaR={var95}");
}
#[test]
fn test_conditional_var_uniform_all_same() {
let data = vec![5.0; 20];
let cvar = conditional_var(&data, 0.95).expect("cvar");
assert!(approx_eq(cvar, 5.0, 1e-10), "cvar={cvar}");
}
#[test]
fn test_conditional_var_invalid() {
let data = vec![1.0, 2.0, 3.0];
assert!(conditional_var(&data, 0.0).is_err());
assert!(conditional_var(&data, 1.0).is_err());
}
#[test]
fn test_tail_index_hill_positive() {
let data: Vec<f64> = (1..=200)
.map(|i| (i as f64 / 201.0_f64).powf(-0.5))
.collect();
let xi = tail_index_hill(&data, 20).expect("hill");
assert!(xi > 0.0, "xi should be positive for Pareto, got {xi}");
}
#[test]
fn test_tail_index_hill_invalid_k() {
assert!(tail_index_hill(&[1.0, 2.0, 3.0], 0).is_err());
}
#[test]
fn test_tail_index_hill_insufficient_data() {
assert!(tail_index_hill(&[1.0, 2.0], 5).is_err());
}
#[test]
fn test_fit_block_maxima_basic() {
let data: Vec<f64> = (0..200).map(|i| (i % 10) as f64 + 1.0).collect();
let fit = fit_block_maxima(&data, 10).expect("fit block maxima");
assert!(fit.distribution.sigma > 0.0);
assert!(fit.n_obs == 20);
}
#[test]
fn test_fit_block_maxima_invalid_block_size() {
let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
assert!(fit_block_maxima(&data, 0).is_err());
}
#[test]
fn test_fit_block_maxima_too_few_blocks() {
let data: Vec<f64> = (0..10).map(|i| i as f64).collect();
assert!(fit_block_maxima(&data, 9).is_err());
}
#[test]
fn test_return_level_ci_valid() {
let data: Vec<f64> = (1..=50).map(|i| i as f64).collect();
let (lo, hi) = return_level_ci(&data, 10.0, 0.05).expect("CI");
assert!(lo <= hi + 1e-9, "lower={lo} should be <= upper={hi}");
assert!(lo.is_finite(), "lower bound must be finite");
assert!(hi.is_finite(), "upper bound must be finite");
}
#[test]
fn test_return_level_ci_wider_for_smaller_n() {
let data_small: Vec<f64> = (1..=20).map(|i| i as f64).collect();
let data_large: Vec<f64> = (1..=200).map(|i| i as f64).collect();
let (lo_s, hi_s) = return_level_ci(&data_small, 10.0, 0.05).expect("CI small");
let (lo_l, hi_l) = return_level_ci(&data_large, 10.0, 0.05).expect("CI large");
let width_small = hi_s - lo_s;
let width_large = hi_l - lo_l;
assert!(
width_small >= width_large * 0.5,
"Small-n CI ({width_small}) should be at least comparable to large-n CI ({width_large})"
);
}
#[test]
fn test_return_level_ci_invalid_period() {
let data: Vec<f64> = (1..=50).map(|i| i as f64).collect();
assert!(return_level_ci(&data, 0.5, 0.05).is_err());
assert!(return_level_ci(&data, 1.0, 0.05).is_err());
}
#[test]
fn test_return_level_ci_invalid_alpha() {
let data: Vec<f64> = (1..=50).map(|i| i as f64).collect();
assert!(return_level_ci(&data, 10.0, 0.0).is_err());
assert!(return_level_ci(&data, 10.0, 1.0).is_err());
}
#[test]
fn test_pot_analysis_basic() {
let data: Vec<f64> = (1..=100).map(|i| i as f64).collect();
let result = pot_analysis(&data, 50.0);
assert!(result.is_ok(), "pot_analysis failed: {:?}", result.err());
}
#[test]
fn test_pot_analysis_non_finite_threshold() {
let data = vec![1.0, 2.0, 3.0];
assert!(pot_analysis(&data, f64::INFINITY).is_err());
assert!(pot_analysis(&data, f64::NAN).is_err());
}
}