scirs2_stats/distributions/
generalized_pareto.rs1use crate::error::{StatsError, StatsResult};
27use crate::sampling::SampleableDistribution;
28use scirs2_core::numeric::{Float, NumCast};
29use scirs2_core::random::prelude::*;
30use scirs2_core::random::rand_distributions::Distribution as _;
31use scirs2_core::random::Uniform as RandUniform;
32
33pub struct GeneralizedPareto<F: Float> {
44 pub xi: F,
46 pub sigma: F,
48 pub mu: F,
50 rand_distr: RandUniform<f64>,
51}
52
53impl<F: Float + NumCast + std::fmt::Display> GeneralizedPareto<F> {
54 pub fn new(xi: F, sigma: F, mu: F) -> StatsResult<Self> {
62 if sigma <= F::zero() {
63 return Err(StatsError::DomainError(
64 "Scale parameter sigma must be positive".to_string(),
65 ));
66 }
67 let rand_distr = RandUniform::new(0.0_f64, 1.0_f64).map_err(|_| {
68 StatsError::ComputationError(
69 "Failed to create uniform distribution for GPD sampling".to_string(),
70 )
71 })?;
72 Ok(Self {
73 xi,
74 sigma,
75 mu,
76 rand_distr,
77 })
78 }
79
80 pub fn upper_bound(&self) -> Option<F> {
84 if self.xi < F::zero() {
85 Some(self.mu - self.sigma / self.xi)
86 } else {
87 None
88 }
89 }
90
91 pub fn pdf(&self, x: F) -> F {
93 let z = (x - self.mu) / self.sigma;
94 if z < F::zero() {
95 return F::zero();
96 }
97 if let Some(ub) = self.upper_bound() {
98 if x > ub {
99 return F::zero();
100 }
101 }
102
103 let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
104 let abs_xi: f64 = xi_f64.abs();
105
106 if abs_xi < 1e-10 {
107 let exp_term = (-z).exp();
109 exp_term / self.sigma
110 } else {
111 let one = F::one();
112 let base = one + self.xi * z;
113 if base <= F::zero() {
114 return F::zero();
115 }
116 let exponent = -(one / self.xi + one);
117 base.powf(exponent) / self.sigma
118 }
119 }
120
121 pub fn cdf(&self, x: F) -> F {
123 let z = (x - self.mu) / self.sigma;
124 if z <= F::zero() {
125 return F::zero();
126 }
127 if let Some(ub) = self.upper_bound() {
128 if x >= ub {
129 return F::one();
130 }
131 }
132
133 let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
134 let abs_xi: f64 = xi_f64.abs();
135
136 if abs_xi < 1e-10 {
137 F::one() - (-z).exp()
139 } else {
140 let one = F::one();
141 let base = one + self.xi * z;
142 if base <= F::zero() {
143 return F::one();
144 }
145 let exponent = -one / self.xi;
146 F::one() - base.powf(exponent)
147 }
148 }
149
150 pub fn sf(&self, x: F) -> F {
152 F::one() - self.cdf(x)
153 }
154
155 pub fn ppf(&self, q: F) -> StatsResult<F> {
159 if q < F::zero() || q >= F::one() {
160 return Err(StatsError::DomainError(
161 "Quantile q must be in [0, 1)".to_string(),
162 ));
163 }
164
165 let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
166 let abs_xi: f64 = xi_f64.abs();
167
168 let x = if abs_xi < 1e-10 {
169 self.mu + self.sigma * (-(F::one() - q).ln())
171 } else {
172 let one = F::one();
174 let base = (one - q).powf(-self.xi) - one;
175 self.mu + self.sigma * base / self.xi
176 };
177
178 Ok(x)
179 }
180
181 pub fn logpdf(&self, x: F) -> F {
183 let pdf_val = self.pdf(x);
184 if pdf_val <= F::zero() {
185 F::from(f64::NEG_INFINITY).unwrap_or(F::zero())
186 } else {
187 pdf_val.ln()
188 }
189 }
190
191 pub fn mean(&self) -> Option<F> {
195 let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
196 if xi_f64 >= 1.0 {
197 None
198 } else {
199 let one = F::one();
200 Some(self.mu + self.sigma / (one - self.xi))
201 }
202 }
203
204 pub fn variance(&self) -> Option<F> {
208 let xi_f64: f64 = NumCast::from(self.xi).unwrap_or(0.0);
209 if xi_f64 >= 0.5 {
210 None
211 } else {
212 let one = F::one();
213 let two = F::from(2.0).unwrap_or(one + one);
214 let numer = self.sigma * self.sigma;
215 let denom = (one - self.xi) * (one - self.xi) * (one - two * self.xi);
216 Some(numer / denom)
217 }
218 }
219
220 pub fn rvs<R: Rng + ?Sized>(&self, n: usize, rng: &mut R) -> StatsResult<Vec<F>> {
222 let mut samples = Vec::with_capacity(n);
223 for _ in 0..n {
224 let u: f64 = self.rand_distr.sample(rng);
225 let q = F::from(u).ok_or_else(|| {
226 StatsError::ComputationError("Failed to convert uniform sample to F".to_string())
227 })?;
228 let x = self.ppf(q)?;
229 samples.push(x);
230 }
231 Ok(samples)
232 }
233
234 pub fn fit_lmoments(data: &[F], threshold: F) -> StatsResult<(F, F)>
238 where
239 F: std::ops::Sub<Output = F>
240 + std::iter::Sum<F>
241 + std::ops::Mul<Output = F>
242 + Copy
243 + PartialOrd,
244 {
245 let exceedances: Vec<F> = data
246 .iter()
247 .filter_map(|&x| {
248 if x > threshold {
249 Some(x - threshold)
250 } else {
251 None
252 }
253 })
254 .collect();
255
256 if exceedances.len() < 3 {
257 return Err(StatsError::InsufficientData(
258 "Need at least 3 exceedances for L-moment fitting".to_string(),
259 ));
260 }
261
262 let n = exceedances.len();
263 let n_f = F::from(n)
264 .ok_or_else(|| StatsError::ComputationError("Failed to convert n to F".to_string()))?;
265
266 let mut sorted = exceedances.clone();
268 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
269
270 let l1: F = sorted.iter().cloned().sum::<F>() / n_f;
271
272 let mut l2_sum = F::zero();
274 for (i, &x) in sorted.iter().enumerate() {
275 let two = F::from(2.0).unwrap_or(F::one() + F::one());
276 let one = F::one();
277 let pi = F::from(i + 1)
278 .ok_or_else(|| StatsError::ComputationError("Conversion error".to_string()))?;
279 let weight = (two * pi - one) / n_f - one;
280 l2_sum = l2_sum + weight * x;
281 }
282 let l2 = l2_sum / n_f;
283
284 let two = F::from(2.0).unwrap_or(F::one() + F::one());
286 let xi_hat = two - l1 / l2;
287 let sigma_hat = two * l1 * l2 / (l1 - two * l2 + l2);
288
289 if sigma_hat <= F::zero() {
290 return Err(StatsError::ComputationError(
291 "L-moment fit produced non-positive scale".to_string(),
292 ));
293 }
294
295 Ok((xi_hat, sigma_hat))
296 }
297}
298
299impl<F: Float + NumCast + std::fmt::Display> SampleableDistribution<F> for GeneralizedPareto<F> {
300 fn rvs(&self, size: usize) -> StatsResult<Vec<F>> {
301 use scirs2_core::random::rngs::SmallRng;
302 use scirs2_core::random::SeedableRng;
303 let seed = std::time::SystemTime::now()
304 .duration_since(std::time::UNIX_EPOCH)
305 .map(|d| d.as_nanos() as u64)
306 .unwrap_or(0x9e3779b97f4a7c15);
307 let mut rng = SmallRng::seed_from_u64(seed);
308 let mut samples = Vec::with_capacity(size);
309 for _ in 0..size {
310 let u: f64 = self.rand_distr.sample(&mut rng);
311 let q = F::from(u).ok_or_else(|| {
312 StatsError::ComputationError("Failed to convert uniform sample".to_string())
313 })?;
314 samples.push(self.ppf(q)?);
315 }
316 Ok(samples)
317 }
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323
324 #[test]
325 fn test_exponential_limit() {
326 let gpd = GeneralizedPareto::new(0.0f64, 1.0, 0.0).expect("valid params");
328 let cdf = gpd.cdf(1.0);
330 assert!((cdf - 0.6321205588).abs() < 1e-7, "cdf={}", cdf);
331 }
332
333 #[test]
334 fn test_pdf_non_negative() {
335 let gpd = GeneralizedPareto::new(0.5f64, 1.0, 0.0).expect("valid params");
336 for &x in &[0.0, 0.5, 1.0, 2.0, 5.0] {
337 assert!(gpd.pdf(x) >= 0.0);
338 }
339 assert_eq!(gpd.pdf(-0.1), 0.0);
341 }
342
343 #[test]
344 fn test_bounded_support_negative_xi() {
345 let gpd = GeneralizedPareto::new(-0.5f64, 1.0, 0.0).expect("valid params");
347 assert!((gpd.upper_bound().expect("should have upper bound") - 2.0).abs() < 1e-12);
348 assert_eq!(gpd.pdf(2.5), 0.0); assert!(gpd.pdf(1.0) > 0.0);
350 }
351
352 #[test]
353 fn test_cdf_ppf_roundtrip() {
354 let gpd = GeneralizedPareto::new(0.3f64, 2.0, 1.0).expect("valid params");
355 for &q in &[0.1, 0.3, 0.5, 0.7, 0.9] {
356 let x = gpd.ppf(q).expect("ppf should succeed");
357 let cdf_val = gpd.cdf(x);
358 assert!(
359 (cdf_val - q).abs() < 1e-9,
360 "q={} cdf(ppf(q))={} diff={}",
361 q,
362 cdf_val,
363 (cdf_val - q).abs()
364 );
365 }
366 }
367
368 #[test]
369 fn test_mean_variance() {
370 let gpd = GeneralizedPareto::new(0.0f64, 2.0, 0.0).expect("valid params");
371 assert!((gpd.mean().expect("mean exists") - 2.0).abs() < 1e-10);
373 assert!((gpd.variance().expect("variance exists") - 4.0).abs() < 1e-10);
374 }
375}