Skip to main content

scirs2_stats/distributions/
generalized_pareto.rs

1//! Generalized Pareto Distribution (GPD) for extreme value analysis
2//!
3//! The GPD is used to model the distribution of exceedances over a threshold
4//! in Extreme Value Theory (EVT). It arises as the limiting distribution for
5//! threshold exceedances from a broad class of distributions via the
6//! Pickands-Balkema-de Haan theorem.
7//!
8//! # Parameterisation
9//!
10//! For threshold `u`, shape `ξ` (xi), and scale `σ > 0`:
11//!
12//! ```text
13//! F(x) = 1 - (1 + ξ·(x-u)/σ)^{-1/ξ}   if ξ ≠ 0
14//! F(x) = 1 - exp(-(x-u)/σ)               if ξ = 0 (exponential)
15//! ```
16//!
17//! Support:
18//!   - `x ≥ u`                      for ξ ≥ 0
19//!   - `u ≤ x ≤ u - σ/ξ`           for ξ < 0
20//!
21//! Special cases:
22//! - ξ = 0 → Exponential distribution
23//! - ξ = 1 → Uniform distribution (up to reparameterisation)
24//! - ξ → ∞ → heavy-tailed (Pareto-like)
25
26use crate::error::{StatsError, StatsResult};
27use crate::sampling::SampleableDistribution;
28use scirs2_core::numeric::{Float, NumCast};
29use scirs2_core::random::prelude::*;
30use scirs2_core::random::rand_distributions::Distribution as _;
31use scirs2_core::random::Uniform as RandUniform;
32
33/// Generalized Pareto Distribution
34///
35/// # Examples
36///
37/// ```
38/// use scirs2_stats::distributions::generalized_pareto::GeneralizedPareto;
39///
40/// let gpd = GeneralizedPareto::new(0.5f64, 1.0, 0.0).expect("valid parameters");
41/// assert!(gpd.pdf(1.0) > 0.0);
42/// ```
43pub struct GeneralizedPareto<F: Float> {
44    /// Shape parameter ξ (xi); negative ξ gives bounded support
45    pub xi: F,
46    /// Scale parameter σ > 0
47    pub sigma: F,
48    /// Location (threshold) parameter u
49    pub mu: F,
50    rand_distr: RandUniform<f64>,
51}
52
53impl<F: Float + NumCast + std::fmt::Display> GeneralizedPareto<F> {
54    /// Create a new Generalized Pareto distribution.
55    ///
56    /// # Arguments
57    ///
58    /// * `xi` - Shape parameter ξ (any real number)
59    /// * `sigma` - Scale parameter σ (must be > 0)
60    /// * `mu` - Location (threshold) parameter u
61    pub fn new(xi: F, sigma: F, mu: F) -> StatsResult<Self> {
62        if sigma <= F::zero() {
63            return Err(StatsError::DomainError(
64                "Scale parameter sigma must be positive".to_string(),
65            ));
66        }
67        let rand_distr = RandUniform::new(0.0_f64, 1.0_f64).map_err(|_| {
68            StatsError::ComputationError(
69                "Failed to create uniform distribution for GPD sampling".to_string(),
70            )
71        })?;
72        Ok(Self {
73            xi,
74            sigma,
75            mu,
76            rand_distr,
77        })
78    }
79
80    /// Upper bound of the support when ξ < 0 (`mu - sigma/xi`).
81    ///
82    /// Returns `None` when ξ ≥ 0 (unbounded support).
83    pub fn upper_bound(&self) -> Option<F> {
84        if self.xi < F::zero() {
85            Some(self.mu - self.sigma / self.xi)
86        } else {
87            None
88        }
89    }
90
91    /// Probability density function.
92    pub fn pdf(&self, x: F) -> F {
93        let z = (x - self.mu) / self.sigma;
94        if z < F::zero() {
95            return F::zero();
96        }
97        if let Some(ub) = self.upper_bound() {
98            if x > ub {
99                return F::zero();
100            }
101        }
102
103        let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
104        let abs_xi: f64 = xi_f64.abs();
105
106        if abs_xi < 1e-10 {
107            // ξ → 0: Exponential limit
108            let exp_term = (-z).exp();
109            exp_term / self.sigma
110        } else {
111            let one = F::one();
112            let base = one + self.xi * z;
113            if base <= F::zero() {
114                return F::zero();
115            }
116            let exponent = -(one / self.xi + one);
117            base.powf(exponent) / self.sigma
118        }
119    }
120
121    /// Cumulative distribution function.
122    pub fn cdf(&self, x: F) -> F {
123        let z = (x - self.mu) / self.sigma;
124        if z <= F::zero() {
125            return F::zero();
126        }
127        if let Some(ub) = self.upper_bound() {
128            if x >= ub {
129                return F::one();
130            }
131        }
132
133        let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
134        let abs_xi: f64 = xi_f64.abs();
135
136        if abs_xi < 1e-10 {
137            // ξ → 0: Exponential
138            F::one() - (-z).exp()
139        } else {
140            let one = F::one();
141            let base = one + self.xi * z;
142            if base <= F::zero() {
143                return F::one();
144            }
145            let exponent = -one / self.xi;
146            F::one() - base.powf(exponent)
147        }
148    }
149
150    /// Survival function (1 - CDF).
151    pub fn sf(&self, x: F) -> F {
152        F::one() - self.cdf(x)
153    }
154
155    /// Percent point function (inverse CDF / quantile function).
156    ///
157    /// Returns `Err` if `q` is not in [0, 1).
158    pub fn ppf(&self, q: F) -> StatsResult<F> {
159        if q < F::zero() || q >= F::one() {
160            return Err(StatsError::DomainError(
161                "Quantile q must be in [0, 1)".to_string(),
162            ));
163        }
164
165        let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
166        let abs_xi: f64 = xi_f64.abs();
167
168        let x = if abs_xi < 1e-10 {
169            // Exponential quantile
170            self.mu + self.sigma * (-(F::one() - q).ln())
171        } else {
172            // General GPD quantile
173            let one = F::one();
174            let base = (one - q).powf(-self.xi) - one;
175            self.mu + self.sigma * base / self.xi
176        };
177
178        Ok(x)
179    }
180
181    /// Log probability density function.
182    pub fn logpdf(&self, x: F) -> F {
183        let pdf_val = self.pdf(x);
184        if pdf_val <= F::zero() {
185            F::from(f64::NEG_INFINITY).unwrap_or(F::zero())
186        } else {
187            pdf_val.ln()
188        }
189    }
190
191    /// Mean of the distribution.
192    ///
193    /// Exists only when ξ < 1. Returns `None` otherwise.
194    pub fn mean(&self) -> Option<F> {
195        let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
196        if xi_f64 >= 1.0 {
197            None
198        } else {
199            let one = F::one();
200            Some(self.mu + self.sigma / (one - self.xi))
201        }
202    }
203
204    /// Variance of the distribution.
205    ///
206    /// Exists only when ξ < 0.5. Returns `None` otherwise.
207    pub fn variance(&self) -> Option<F> {
208        let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
209        if xi_f64 >= 0.5 {
210            None
211        } else {
212            let one = F::one();
213            let two = F::from(2.0).unwrap_or(one + one);
214            let numer = self.sigma * self.sigma;
215            let denom = (one - self.xi) * (one - self.xi) * (one - two * self.xi);
216            Some(numer / denom)
217        }
218    }
219
220    /// Generate random variates from the GPD using the quantile transform.
221    pub fn rvs<R: Rng + ?Sized>(&self, n: usize, rng: &mut R) -> StatsResult<Vec<F>> {
222        let mut samples = Vec::with_capacity(n);
223        for _ in 0..n {
224            let u: f64 = self.rand_distr.sample(rng);
225            let q = F::from(u).ok_or_else(|| {
226                StatsError::ComputationError("Failed to convert uniform sample to F".to_string())
227            })?;
228            let x = self.ppf(q)?;
229            samples.push(x);
230        }
231        Ok(samples)
232    }
233
234    /// Fit GPD parameters to data exceeding a threshold using MLE.
235    ///
236    /// Returns `(xi_hat, sigma_hat)` using the method of L-moments.
237    pub fn fit_lmoments(data: &[F], threshold: F) -> StatsResult<(F, F)>
238    where
239        F: std::ops::Sub<Output = F>
240            + std::iter::Sum<F>
241            + std::ops::Mul<Output = F>
242            + Copy
243            + PartialOrd,
244    {
245        let exceedances: Vec<F> = data
246            .iter()
247            .filter_map(|&x| {
248                if x > threshold {
249                    Some(x - threshold)
250                } else {
251                    None
252                }
253            })
254            .collect();
255
256        if exceedances.len() < 3 {
257            return Err(StatsError::InsufficientData(
258                "Need at least 3 exceedances for L-moment fitting".to_string(),
259            ));
260        }
261
262        let n = exceedances.len();
263        let n_f = F::from(n)
264            .ok_or_else(|| StatsError::ComputationError("Failed to convert n to F".to_string()))?;
265
266        // L1 (mean of sorted exceedances)
267        let mut sorted = exceedances.clone();
268        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
269
270        let l1: F = sorted.iter().cloned().sum::<F>() / n_f;
271
272        // L2 using plotting positions
273        let mut l2_sum = F::zero();
274        for (i, &x) in sorted.iter().enumerate() {
275            let two = F::from(2.0).unwrap_or(F::one() + F::one());
276            let one = F::one();
277            let pi = F::from(i + 1)
278                .ok_or_else(|| StatsError::ComputationError("Conversion error".to_string()))?;
279            let weight = (two * pi - one) / n_f - one;
280            l2_sum = l2_sum + weight * x;
281        }
282        let l2 = l2_sum / n_f;
283
284        // Hosking-Wallis L-moment estimators
285        let two = F::from(2.0).unwrap_or(F::one() + F::one());
286        let xi_hat = two - l1 / l2;
287        let sigma_hat = two * l1 * l2 / (l1 - two * l2 + l2);
288
289        if sigma_hat <= F::zero() {
290            return Err(StatsError::ComputationError(
291                "L-moment fit produced non-positive scale".to_string(),
292            ));
293        }
294
295        Ok((xi_hat, sigma_hat))
296    }
297}
298
299impl<F: Float + NumCast + std::fmt::Display> SampleableDistribution<F> for GeneralizedPareto<F> {
300    fn rvs(&self, size: usize) -> StatsResult<Vec<F>> {
301        use scirs2_core::random::rngs::SmallRng;
302        use scirs2_core::random::SeedableRng;
303        let seed = std::time::SystemTime::now()
304            .duration_since(std::time::UNIX_EPOCH)
305            .map(|d| d.as_nanos() as u64)
306            .unwrap_or(0x9e3779b97f4a7c15);
307        let mut rng = SmallRng::seed_from_u64(seed);
308        let mut samples = Vec::with_capacity(size);
309        for _ in 0..size {
310            let u: f64 = self.rand_distr.sample(&mut rng);
311            let q = F::from(u).ok_or_else(|| {
312                StatsError::ComputationError("Failed to convert uniform sample".to_string())
313            })?;
314            samples.push(self.ppf(q)?);
315        }
316        Ok(samples)
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323
324    #[test]
325    fn test_exponential_limit() {
326        // ξ = 0 → exponential with rate 1/σ
327        let gpd = GeneralizedPareto::new(0.0f64, 1.0, 0.0).expect("valid params");
328        // CDF at x=1 for Exp(1): 1 - e^{-1} ≈ 0.6321
329        let cdf = gpd.cdf(1.0);
330        assert!((cdf - 0.6321205588).abs() < 1e-7, "cdf={}", cdf);
331    }
332
333    #[test]
334    fn test_pdf_non_negative() {
335        let gpd = GeneralizedPareto::new(0.5f64, 1.0, 0.0).expect("valid params");
336        for &x in &[0.0, 0.5, 1.0, 2.0, 5.0] {
337            assert!(gpd.pdf(x) >= 0.0);
338        }
339        // Support starts at mu=0
340        assert_eq!(gpd.pdf(-0.1), 0.0);
341    }
342
343    #[test]
344    fn test_bounded_support_negative_xi() {
345        // ξ = -0.5, σ = 1, u = 0 → upper bound = 0 - 1/(-0.5) = 2
346        let gpd = GeneralizedPareto::new(-0.5f64, 1.0, 0.0).expect("valid params");
347        assert!((gpd.upper_bound().expect("should have upper bound") - 2.0).abs() < 1e-12);
348        assert_eq!(gpd.pdf(2.5), 0.0); // Outside support
349        assert!(gpd.pdf(1.0) > 0.0);
350    }
351
352    #[test]
353    fn test_cdf_ppf_roundtrip() {
354        let gpd = GeneralizedPareto::new(0.3f64, 2.0, 1.0).expect("valid params");
355        for &q in &[0.1, 0.3, 0.5, 0.7, 0.9] {
356            let x = gpd.ppf(q).expect("ppf should succeed");
357            let cdf_val = gpd.cdf(x);
358            assert!(
359                (cdf_val - q).abs() < 1e-9,
360                "q={} cdf(ppf(q))={} diff={}",
361                q,
362                cdf_val,
363                (cdf_val - q).abs()
364            );
365        }
366    }
367
368    #[test]
369    fn test_mean_variance() {
370        let gpd = GeneralizedPareto::new(0.0f64, 2.0, 0.0).expect("valid params");
371        // ξ=0 (exponential): mean=σ=2, var=σ²=4
372        assert!((gpd.mean().expect("mean exists") - 2.0).abs() < 1e-10);
373        assert!((gpd.variance().expect("variance exists") - 4.0).abs() < 1e-10);
374    }
375}