use crate::error::{StatsError, StatsResult};
use crate::sampling::SampleableDistribution;
use scirs2_core::numeric::{Float, NumCast};
use scirs2_core::random::prelude::*;
use scirs2_core::random::Uniform as RandUniform;
pub struct GeneralizedPareto<F: Float> {
pub xi: F,
pub sigma: F,
pub mu: F,
rand_distr: RandUniform<f64>,
}
impl<F: Float + NumCast + std::fmt::Display> GeneralizedPareto<F> {
pub fn new(xi: F, sigma: F, mu: F) -> StatsResult<Self> {
if sigma <= F::zero() {
return Err(StatsError::DomainError(
"Scale parameter sigma must be positive".to_string(),
));
}
let rand_distr = RandUniform::new(0.0_f64, 1.0_f64).map_err(|_| {
StatsError::ComputationError(
"Failed to create uniform distribution for GPD sampling".to_string(),
)
})?;
Ok(Self {
xi,
sigma,
mu,
rand_distr,
})
}
pub fn upper_bound(&self) -> Option<F> {
if self.xi < F::zero() {
Some(self.mu - self.sigma / self.xi)
} else {
None
}
}
pub fn pdf(&self, x: F) -> F {
let z = (x - self.mu) / self.sigma;
if z < F::zero() {
return F::zero();
}
if let Some(ub) = self.upper_bound() {
if x > ub {
return F::zero();
}
}
let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
let abs_xi: f64 = xi_f64.abs();
if abs_xi < 1e-10 {
let exp_term = (-z).exp();
exp_term / self.sigma
} else {
let one = F::one();
let base = one + self.xi * z;
if base <= F::zero() {
return F::zero();
}
let exponent = -(one / self.xi + one);
base.powf(exponent) / self.sigma
}
}
pub fn cdf(&self, x: F) -> F {
let z = (x - self.mu) / self.sigma;
if z <= F::zero() {
return F::zero();
}
if let Some(ub) = self.upper_bound() {
if x >= ub {
return F::one();
}
}
let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
let abs_xi: f64 = xi_f64.abs();
if abs_xi < 1e-10 {
F::one() - (-z).exp()
} else {
let one = F::one();
let base = one + self.xi * z;
if base <= F::zero() {
return F::one();
}
let exponent = -one / self.xi;
F::one() - base.powf(exponent)
}
}
pub fn sf(&self, x: F) -> F {
F::one() - self.cdf(x)
}
pub fn ppf(&self, q: F) -> StatsResult<F> {
if q < F::zero() || q >= F::one() {
return Err(StatsError::DomainError(
"Quantile q must be in [0, 1)".to_string(),
));
}
let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
let abs_xi: f64 = xi_f64.abs();
let x = if abs_xi < 1e-10 {
self.mu + self.sigma * (-(F::one() - q).ln())
} else {
let one = F::one();
let base = (one - q).powf(-self.xi) - one;
self.mu + self.sigma * base / self.xi
};
Ok(x)
}
pub fn logpdf(&self, x: F) -> F {
let pdf_val = self.pdf(x);
if pdf_val <= F::zero() {
F::from(f64::NEG_INFINITY).unwrap_or(F::zero())
} else {
pdf_val.ln()
}
}
pub fn mean(&self) -> Option<F> {
let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
if xi_f64 >= 1.0 {
None
} else {
let one = F::one();
Some(self.mu + self.sigma / (one - self.xi))
}
}
pub fn variance(&self) -> Option<F> {
let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
if xi_f64 >= 0.5 {
None
} else {
let one = F::one();
let two = F::from(2.0).unwrap_or(one + one);
let numer = self.sigma * self.sigma;
let denom = (one - self.xi) * (one - self.xi) * (one - two * self.xi);
Some(numer / denom)
}
}
pub fn rvs<R: Rng + ?Sized>(&self, n: usize, rng: &mut R) -> StatsResult<Vec<F>> {
let mut samples = Vec::with_capacity(n);
for _ in 0..n {
let u: f64 = self.rand_distr.sample(rng);
let q = F::from(u).ok_or_else(|| {
StatsError::ComputationError("Failed to convert uniform sample to F".to_string())
})?;
let x = self.ppf(q)?;
samples.push(x);
}
Ok(samples)
}
pub fn fit_lmoments(data: &[F], threshold: F) -> StatsResult<(F, F)>
where
F: std::ops::Sub<Output = F>
+ std::iter::Sum<F>
+ std::ops::Mul<Output = F>
+ Copy
+ PartialOrd,
{
let exceedances: Vec<F> = data
.iter()
.filter_map(|&x| if x > threshold { Some(x - threshold) } else { None })
.collect();
if exceedances.len() < 3 {
return Err(StatsError::InsufficientData(
"Need at least 3 exceedances for L-moment fitting".to_string(),
));
}
let n = exceedances.len();
let n_f = F::from(n).ok_or_else(|| {
StatsError::ComputationError("Failed to convert n to F".to_string())
})?;
let mut sorted = exceedances.clone();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let l1: F = sorted.iter().cloned().sum::<F>() / n_f;
let mut l2_sum = F::zero();
for (i, &x) in sorted.iter().enumerate() {
let two = F::from(2.0).unwrap_or(F::one() + F::one());
let one = F::one();
let pi = F::from(i + 1).ok_or_else(|| {
StatsError::ComputationError("Conversion error".to_string())
})?;
let weight = (two * pi - one) / n_f - one;
l2_sum = l2_sum + weight * x;
}
let l2 = l2_sum / n_f;
let two = F::from(2.0).unwrap_or(F::one() + F::one());
let xi_hat = two - l1 / l2;
let sigma_hat = two * l1 * l2 / (l1 - two * l2 + l2);
if sigma_hat <= F::zero() {
return Err(StatsError::ComputationError(
"L-moment fit produced non-positive scale".to_string(),
));
}
Ok((xi_hat, sigma_hat))
}
}
impl<F: Float + NumCast + std::fmt::Display> SampleableDistribution<F>
for GeneralizedPareto<F>
{
fn rvs(&self, size: usize) -> StatsResult<Vec<F>> {
use scirs2_core::random::SmallRng;
use scirs2_core::random::SeedableRng;
let mut rng = SmallRng::from_entropy();
let mut samples = Vec::with_capacity(size);
for _ in 0..size {
let u: f64 = self.rand_distr.sample(&mut rng);
let q = F::from(u).ok_or_else(|| {
StatsError::ComputationError("Failed to convert uniform sample".to_string())
})?;
samples.push(self.ppf(q)?);
}
Ok(samples)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exponential_limit() {
let gpd = GeneralizedPareto::new(0.0f64, 1.0, 0.0).expect("valid params");
let cdf = gpd.cdf(1.0);
assert!((cdf - 0.6321205588).abs() < 1e-7, "cdf={}", cdf);
}
#[test]
fn test_pdf_non_negative() {
let gpd = GeneralizedPareto::new(0.5f64, 1.0, 0.0).expect("valid params");
for &x in &[0.0, 0.5, 1.0, 2.0, 5.0] {
assert!(gpd.pdf(x) >= 0.0);
}
assert_eq!(gpd.pdf(-0.1), 0.0);
}
#[test]
fn test_bounded_support_negative_xi() {
let gpd = GeneralizedPareto::new(-0.5f64, 1.0, 0.0).expect("valid params");
assert!((gpd.upper_bound().expect("should have upper bound") - 2.0).abs() < 1e-12);
assert_eq!(gpd.pdf(2.5), 0.0); assert!(gpd.pdf(1.0) > 0.0);
}
#[test]
fn test_cdf_ppf_roundtrip() {
let gpd = GeneralizedPareto::new(0.3f64, 2.0, 1.0).expect("valid params");
for &q in &[0.1, 0.3, 0.5, 0.7, 0.9] {
let x = gpd.ppf(q).expect("ppf should succeed");
let cdf_val = gpd.cdf(x);
assert!(
(cdf_val - q).abs() < 1e-9,
"q={} cdf(ppf(q))={} diff={}",
q,
cdf_val,
(cdf_val - q).abs()
);
}
}
#[test]
fn test_mean_variance() {
let gpd = GeneralizedPareto::new(0.0f64, 2.0, 0.0).expect("valid params");
assert!((gpd.mean().expect("mean exists") - 2.0).abs() < 1e-10);
assert!((gpd.variance().expect("variance exists") - 4.0).abs() < 1e-10);
}
}