scirs2_stats/extreme_value/
analysis.rs

1//! EVT analysis tools: block maxima, Peaks Over Threshold (POT), return levels,
2//! Hill/Pickands estimators, mean excess plots, and empirical return periods.
3//!
4//! # Key Functions
5//! - [`block_maxima_analysis`]: Fit a GEV to block maxima and compute return levels.
6//! - [`peaks_over_threshold`]: POT analysis – extract exceedances, fit GPD, compute return levels.
7//! - [`mean_excess_plot`]: Mean Residual Life plot to help choose the POT threshold.
8//! - [`hill_estimator`]: Semi-parametric tail index estimator for heavy-tailed data.
9//! - [`pickands_estimator`]: Pickands tail index estimator.
10//! - [`return_level_confidence`]: Delta-method confidence interval for GEV return levels.
11//! - [`empirical_return_periods`]: Empirical return periods using various plotting positions.
12//!
13//! # References
14//! - Coles, S. (2001). *An Introduction to Statistical Modeling of Extreme Values*. Springer.
15//! - Hill, B.M. (1975). A simple general approach to inference about the tail. *Ann. Statist.*
16//! - Pickands, J. (1975). Statistical inference using extreme order statistics. *Ann. Statist.*
17
18use crate::error::StatsError;
19use scirs2_core::ndarray::{Array1, ArrayView1};
20
21use super::distributions::{GeneralizedExtremeValue, GeneralizedPareto};
22use super::estimation::{gev_fit_lmoments, gev_fit_mle, gev_fit_pwm, gpd_fit_mle};
23
24// ---------------------------------------------------------------------------
25// Shared enumerations
26// ---------------------------------------------------------------------------
27
28/// Method used to estimate distribution parameters in EVT analyses.
29#[derive(Debug, Clone, PartialEq)]
30pub enum EvtEstimationMethod {
31    /// Maximum Likelihood Estimation (Nelder–Mead optimizer).
32    MLE,
33    /// Probability-Weighted Moments (Hosking et al. 1985).
34    PWM,
35    /// L-moments (Hosking 1990) – generally recommended for small samples.
36    LMoments,
37}
38
39/// Plotting position formula for empirical return period estimation.
40#[derive(Debug, Clone, PartialEq)]
41pub enum PlottingPosition {
42    /// Weibull: F̂ᵢ = i / (n+1)  — unbiased for uniform distribution.
43    Weibull,
44    /// Gringorten: F̂ᵢ = (i − 0.44) / (n + 0.12)  — recommended for Gumbel.
45    Gringorten,
46    /// Blom: F̂ᵢ = (i − 0.375) / (n + 0.25)  — approximately unbiased normal quantiles.
47    Blom,
48    /// Hazen: F̂ᵢ = (i − 0.5) / n  — midpoint formula.
49    Hazen,
50}
51
52// ---------------------------------------------------------------------------
53// Block maxima
54// ---------------------------------------------------------------------------
55
56/// Results from a block maxima EVT analysis.
57#[derive(Debug, Clone)]
58pub struct BlockMaximaResult {
59    /// The extracted block maxima (one per block).
60    pub block_maxima: Array1<f64>,
61    /// Fitted GEV parameters.
62    pub gev_params: GeneralizedExtremeValue,
63    /// Return levels: list of `(return_period, level)` pairs.
64    pub return_levels: Vec<(f64, f64)>,
65    /// Number of complete blocks extracted from data.
66    pub n_blocks: usize,
67}
68
69/// Block maxima method for EVT analysis.
70///
71/// The data series is divided into non-overlapping blocks of `block_size` observations.
72/// The maximum of each **complete** block is extracted and a GEV distribution is fitted
73/// to these block maxima.
74///
75/// # Arguments
76/// - `data`: input time series.
77/// - `block_size`: number of observations per block (e.g. 365 for annual maxima of daily data).
78/// - `return_periods`: slice of desired return periods (must all be > 1.0).
79/// - `estimation`: parameter estimation method.
80///
81/// # Errors
82/// - [`StatsError::InsufficientData`] if fewer than 2 complete blocks.
83/// - [`StatsError::InvalidArgument`] if `block_size == 0` or any return period ≤ 1.
84pub fn block_maxima_analysis(
85    data: ArrayView1<f64>,
86    block_size: usize,
87    return_periods: &[f64],
88    estimation: EvtEstimationMethod,
89) -> Result<BlockMaximaResult, StatsError> {
90    if block_size == 0 {
91        return Err(StatsError::InvalidArgument(
92            "block_size must be >= 1".into(),
93        ));
94    }
95    for &rp in return_periods {
96        if rp <= 1.0 {
97            return Err(StatsError::InvalidArgument(format!(
98                "All return periods must be > 1.0, got {rp}"
99            )));
100        }
101    }
102
103    let n = data.len();
104    let n_blocks = n / block_size;
105
106    if n_blocks < 2 {
107        return Err(StatsError::InsufficientData(format!(
108            "Need at least 2 complete blocks (block_size={block_size}), \
109             but data length {n} gives only {n_blocks} block(s)"
110        )));
111    }
112
113    // Extract block maxima
114    let mut maxima = Vec::with_capacity(n_blocks);
115    let data_slice: Vec<f64> = data.iter().copied().collect();
116    for b in 0..n_blocks {
117        let start = b * block_size;
118        let end = start + block_size;
119        let block_max = data_slice[start..end]
120            .iter()
121            .copied()
122            .fold(f64::NEG_INFINITY, f64::max);
123        if block_max.is_finite() {
124            maxima.push(block_max);
125        }
126    }
127
128    if maxima.len() < 2 {
129        return Err(StatsError::InsufficientData(
130            "Could not extract at least 2 finite block maxima".into(),
131        ));
132    }
133
134    let maxima_arr = Array1::from(maxima.clone());
135
136    // Fit GEV to block maxima
137    let gev_params = match estimation {
138        EvtEstimationMethod::MLE => gev_fit_mle(maxima_arr.view())?,
139        EvtEstimationMethod::PWM => gev_fit_pwm(maxima_arr.view())?,
140        EvtEstimationMethod::LMoments => gev_fit_lmoments(maxima_arr.view())?,
141    };
142
143    // Compute return levels
144    let return_levels: Vec<(f64, f64)> = return_periods
145        .iter()
146        .map(|&rp| {
147            let level = gev_params.return_level(rp).unwrap_or(f64::NAN);
148            (rp, level)
149        })
150        .collect();
151
152    Ok(BlockMaximaResult {
153        block_maxima: maxima_arr,
154        gev_params,
155        return_levels,
156        n_blocks,
157    })
158}
159
160// ---------------------------------------------------------------------------
161// Peaks Over Threshold (POT)
162// ---------------------------------------------------------------------------
163
164/// Results from a Peaks Over Threshold (POT) analysis.
165#[derive(Debug, Clone)]
166pub struct PotResult {
167    /// The threshold used.
168    pub threshold: f64,
169    /// Exceedances above the threshold (already subtracted: x − threshold).
170    pub exceedances: Array1<f64>,
171    /// Number of exceedances.
172    pub n_exceedances: usize,
173    /// Exceedance rate λ = n_exceedances / n_total.
174    pub rate: f64,
175    /// Fitted GPD parameters (with mu = 0, since exceedances are already shifted).
176    pub gpd_params: GeneralizedPareto,
177    /// Return levels: `(return_period, level)` in the original scale.
178    pub return_levels: Vec<(f64, f64)>,
179}
180
181/// Peaks Over Threshold (POT) analysis.
182///
183/// Identifies values in `data` that exceed `threshold`, fits a GPD to the exceedances,
184/// and computes return levels.
185///
186/// For a return period T (in the same units as the data time steps), the return level is:
187///
188/// x_T = threshold + (σ/ξ) \[(λT)^ξ − 1\]  for ξ ≠ 0
189/// x_T = threshold + σ ln(λT)               for ξ = 0
190///
191/// where λ is the exceedance rate.
192///
193/// # Errors
194/// - [`StatsError::InsufficientData`] if fewer than 5 exceedances above the threshold.
195/// - [`StatsError::InvalidArgument`] if any return period ≤ 1.
196pub fn peaks_over_threshold(
197    data: ArrayView1<f64>,
198    threshold: f64,
199    return_periods: &[f64],
200) -> Result<PotResult, StatsError> {
201    for &rp in return_periods {
202        if rp <= 1.0 {
203            return Err(StatsError::InvalidArgument(format!(
204                "All return periods must be > 1.0, got {rp}"
205            )));
206        }
207    }
208
209    let n_total = data.len();
210    if n_total == 0 {
211        return Err(StatsError::InsufficientData("Data is empty".into()));
212    }
213
214    // Extract exceedances (x − threshold for x > threshold)
215    let exceedances: Vec<f64> = data
216        .iter()
217        .filter_map(|&x| {
218            if x > threshold {
219                Some(x - threshold)
220            } else {
221                None
222            }
223        })
224        .collect();
225
226    let n_exc = exceedances.len();
227    if n_exc < 5 {
228        return Err(StatsError::InsufficientData(format!(
229            "POT requires at least 5 exceedances above threshold {threshold}; got {n_exc}"
230        )));
231    }
232
233    let rate = n_exc as f64 / n_total as f64;
234    let exc_arr = Array1::from(exceedances);
235
236    // Fit GPD to exceedances
237    let gpd_params = gpd_fit_mle(exc_arr.view())?;
238
239    // Return levels in original scale
240    let sigma = gpd_params.sigma;
241    let xi = gpd_params.xi;
242    const XI_TOL: f64 = 1e-10;
243
244    let return_levels: Vec<(f64, f64)> = return_periods
245        .iter()
246        .map(|&rp| {
247            let lambda_t = rate * rp;
248            let level = if xi.abs() < XI_TOL {
249                threshold + sigma * lambda_t.ln()
250            } else {
251                threshold + (sigma / xi) * (lambda_t.powf(xi) - 1.0)
252            };
253            (rp, level)
254        })
255        .collect();
256
257    Ok(PotResult {
258        threshold,
259        exceedances: exc_arr,
260        n_exceedances: n_exc,
261        rate,
262        gpd_params,
263        return_levels,
264    })
265}
266
267// ---------------------------------------------------------------------------
268// Mean Excess / Mean Residual Life plot
269// ---------------------------------------------------------------------------
270
271/// Compute the Mean Excess (Mean Residual Life) function over a range of thresholds.
272///
273/// For each threshold u, the mean excess is E[X − u | X > u].  If the data follow a GPD,
274/// this is a linear function of u (slope = ξ/(1−ξ), intercept = σ/(1−ξ)).
275///
276/// Useful for threshold selection in POT analysis.
277///
278/// # Arguments
279/// - `data`: observed values.
280/// - `n_thresholds`: number of evenly-spaced threshold values to evaluate (default 50).
281///
282/// # Returns
283/// `(thresholds, mean_excess)` — two arrays of the same length.
284///
285/// # Errors
286/// - [`StatsError::InsufficientData`] if fewer than 10 observations.
287pub fn mean_excess_plot(
288    data: ArrayView1<f64>,
289    n_thresholds: usize,
290) -> Result<(Array1<f64>, Array1<f64>), StatsError> {
291    let n = data.len();
292    if n < 10 {
293        return Err(StatsError::InsufficientData(
294            "Mean excess plot requires at least 10 observations".into(),
295        ));
296    }
297    if n_thresholds == 0 {
298        return Err(StatsError::InvalidArgument(
299            "n_thresholds must be >= 1".into(),
300        ));
301    }
302
303    let mut sorted: Vec<f64> = data.iter().copied().collect();
304    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
305
306    // Use the 5th and 95th percentile as threshold range (ensure ≥ 5 exceedances)
307    let lo_idx = n / 20; // ~5th percentile
308    let hi_idx = (9 * n) / 10; // ~90th percentile
309    let lo = sorted[lo_idx];
310    let hi = sorted[hi_idx];
311
312    if hi <= lo {
313        return Err(StatsError::ComputationError(
314            "Mean excess plot: data range too narrow".into(),
315        ));
316    }
317
318    let step = (hi - lo) / n_thresholds as f64;
319    let mut thresholds = Vec::with_capacity(n_thresholds);
320    let mut mean_excess = Vec::with_capacity(n_thresholds);
321
322    for k in 0..n_thresholds {
323        let u = lo + k as f64 * step;
324        let exceedances: Vec<f64> = sorted.iter().filter(|&&x| x > u).map(|&x| x - u).collect();
325        if exceedances.len() < 2 {
326            break;
327        }
328        let me = exceedances.iter().sum::<f64>() / exceedances.len() as f64;
329        thresholds.push(u);
330        mean_excess.push(me);
331    }
332
333    if thresholds.is_empty() {
334        return Err(StatsError::ComputationError(
335            "Mean excess plot: no valid threshold produced enough exceedances".into(),
336        ));
337    }
338
339    Ok((Array1::from(thresholds), Array1::from(mean_excess)))
340}
341
342// ---------------------------------------------------------------------------
343// Hill estimator
344// ---------------------------------------------------------------------------
345
346/// Hill estimator of the tail index ξ for heavy-tailed distributions.
347///
348/// Requires ξ > 0 (Pareto / Fréchet tail behaviour).
349/// Uses the top-k order statistics.
350///
351/// ξ̂_Hill(k) = (1/k) Σ_{i=1}^{k} log(X_{n−i+1:n}) − log(X_{n−k:n})
352///
353/// # Arguments
354/// - `data`: observed values (need not be sorted).
355/// - `k`: number of upper order statistics to use (1 ≤ k ≤ n−1).
356///
357/// # Errors
358/// - [`StatsError::InvalidArgument`] if `k` is 0 or ≥ n.
359/// - [`StatsError::InsufficientData`] if `n < 2`.
360pub fn hill_estimator(data: ArrayView1<f64>, k: usize) -> Result<f64, StatsError> {
361    let n = data.len();
362    if n < 2 {
363        return Err(StatsError::InsufficientData(
364            "Hill estimator requires at least 2 observations".into(),
365        ));
366    }
367    if k == 0 || k >= n {
368        return Err(StatsError::InvalidArgument(format!(
369            "k must be in [1, n-1] = [1, {}], got {k}",
370            n - 1
371        )));
372    }
373
374    let mut sorted: Vec<f64> = data.iter().copied().collect();
375    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
376
377    // X_{n-k:n} is the (k+1)-th largest value
378    let x_threshold = sorted[n - k - 1];
379    if x_threshold <= 0.0 {
380        return Err(StatsError::InvalidArgument(
381            "Hill estimator requires all data values used to be positive".into(),
382        ));
383    }
384
385    let log_threshold = x_threshold.ln();
386    let mut sum = 0.0_f64;
387    for i in (n - k)..n {
388        if sorted[i] <= 0.0 {
389            return Err(StatsError::InvalidArgument(
390                "Hill estimator: encountered non-positive order statistic".into(),
391            ));
392        }
393        sum += sorted[i].ln() - log_threshold;
394    }
395
396    Ok(sum / k as f64)
397}
398
399// ---------------------------------------------------------------------------
400// Pickands estimator
401// ---------------------------------------------------------------------------
402
403/// Pickands estimator of the tail index ξ.
404///
405/// Uses three order statistics symmetrically placed in the upper tail:
406///
407/// ξ̂_Pickands(k) = (1/ln2) * ln\[(X_{n−k+1:n} − X_{n−2k+1:n}) / (X_{n−2k+1:n} − X_{n−4k+1:n})\]
408///
409/// # Arguments
410/// - `data`: observed values.
411/// - `k`: tail parameter; requires 4k < n.
412///
413/// # Errors
414/// - [`StatsError::InvalidArgument`] if `4k >= n` or `k == 0`.
415pub fn pickands_estimator(data: ArrayView1<f64>, k: usize) -> Result<f64, StatsError> {
416    let n = data.len();
417    if k == 0 {
418        return Err(StatsError::InvalidArgument("k must be >= 1".into()));
419    }
420    if 4 * k >= n {
421        return Err(StatsError::InvalidArgument(format!(
422            "Pickands estimator requires 4k < n; got 4*{k}={} >= n={n}",
423            4 * k
424        )));
425    }
426
427    let mut sorted: Vec<f64> = data.iter().copied().collect();
428    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
429
430    // Order statistics (1-indexed from the top):
431    // X_{n−k+1:n}   = sorted[n - k]
432    // X_{n−2k+1:n}  = sorted[n - 2*k]
433    // X_{n−4k+1:n}  = sorted[n - 4*k]
434    let x1 = sorted[n - k];
435    let x2 = sorted[n - 2 * k];
436    let x3 = sorted[n - 4 * k];
437
438    let num = x1 - x2;
439    let den = x2 - x3;
440
441    if den.abs() < 1e-15 {
442        return Err(StatsError::ComputationError(
443            "Pickands estimator: degenerate order statistics (denominator ≈ 0)".into(),
444        ));
445    }
446    if num / den <= 0.0 {
447        return Err(StatsError::ComputationError(
448            "Pickands estimator: invalid ratio (non-positive)".into(),
449        ));
450    }
451
452    let xi = (num / den).ln() / 2.0_f64.ln();
453    Ok(xi)
454}
455
456// ---------------------------------------------------------------------------
457// Return level with confidence interval (delta method)
458// ---------------------------------------------------------------------------
459
460/// Compute a GEV return level with a delta-method confidence interval.
461///
462/// Uses the Fisher information matrix approximation for the GEV to derive the
463/// asymptotic variance of the return level estimator.
464///
465/// # Arguments
466/// - `params`: fitted GEV parameters.
467/// - `return_period`: return period T.
468/// - `n_data`: number of block maxima used to fit the GEV.
469/// - `alpha`: significance level (e.g. 0.05 for 95% CI).
470///
471/// # Returns
472/// `(lower, estimate, upper)`.
473///
474/// # Errors
475/// - [`StatsError::InvalidArgument`] if return_period ≤ 1 or alpha not in (0,0.5).
476pub fn return_level_confidence(
477    params: &GeneralizedExtremeValue,
478    return_period: f64,
479    n_data: usize,
480    alpha: f64,
481) -> Result<(f64, f64, f64), StatsError> {
482    if return_period <= 1.0 {
483        return Err(StatsError::InvalidArgument(format!(
484            "return_period must be > 1, got {return_period}"
485        )));
486    }
487    if !(0.0 < alpha && alpha < 0.5) {
488        return Err(StatsError::InvalidArgument(format!(
489            "alpha must be in (0, 0.5), got {alpha}"
490        )));
491    }
492    if n_data < 3 {
493        return Err(StatsError::InsufficientData(
494            "At least 3 observations needed for confidence interval".into(),
495        ));
496    }
497
498    let estimate = params.return_level(return_period)?;
499
500    // Delta method: approximate variance via numerical differentiation of return level
501    // w.r.t. (μ, σ, ξ).
502    let h = 1e-5;
503    let mu = params.mu;
504    let sigma = params.sigma;
505    let xi = params.xi;
506
507    let rl = |mu2: f64, sig2: f64, xi2: f64| -> f64 {
508        GeneralizedExtremeValue::new(mu2, sig2, xi2)
509            .ok()
510            .and_then(|g| g.return_level(return_period).ok())
511            .unwrap_or(f64::NAN)
512    };
513
514    let d_mu = (rl(mu + h, sigma, xi) - rl(mu - h, sigma, xi)) / (2.0 * h);
515    let d_sigma = (rl(mu, sigma + h, xi) - rl(mu, sigma - h, xi)) / (2.0 * h);
516    let d_xi = (rl(mu, sigma, xi + h) - rl(mu, sigma, xi - h)) / (2.0 * h);
517
518    if !d_mu.is_finite() || !d_sigma.is_finite() || !d_xi.is_finite() {
519        return Err(StatsError::ComputationError(
520            "Delta method: gradient computation failed at these parameters".into(),
521        ));
522    }
523
524    // Approximate variance of the information matrix (diagonal approximation):
525    // Var(μ̂) ≈ σ²/n,  Var(σ̂) ≈ σ²/(2n),  Var(ξ̂) ≈ (1+ξ)²/n
526    // These are rough order-of-magnitude estimates; the off-diagonal terms are ignored here.
527    let nf = n_data as f64;
528    let var_mu = sigma.powi(2) / nf;
529    let var_sigma = sigma.powi(2) / (2.0 * nf);
530    let var_xi = (1.0 + xi).powi(2) / nf;
531
532    let var_rl = d_mu.powi(2) * var_mu + d_sigma.powi(2) * var_sigma + d_xi.powi(2) * var_xi;
533
534    if var_rl < 0.0 || !var_rl.is_finite() {
535        return Err(StatsError::ComputationError(
536            "Delta method: negative or invalid variance estimate".into(),
537        ));
538    }
539
540    // z_{1-α/2} from normal distribution (pre-computed for common alpha values)
541    let z = normal_quantile(1.0 - alpha / 2.0);
542    let half_width = z * var_rl.sqrt();
543
544    Ok((estimate - half_width, estimate, estimate + half_width))
545}
546
547/// Approximate normal quantile (Beasley–Springer–Moro algorithm).
548fn normal_quantile(p: f64) -> f64 {
549    // Rational approximation from Abramowitz & Stegun 26.2.17
550    let p = p.clamp(1e-15, 1.0 - 1e-15);
551    let q = if p < 0.5 { p } else { 1.0 - p };
552    let t = (-2.0 * q.ln()).sqrt();
553    const C: [f64; 3] = [2.515517, 0.802853, 0.010328];
554    const D: [f64; 3] = [1.432788, 0.189269, 0.001308];
555    let num = C[0] + C[1] * t + C[2] * t.powi(2);
556    let den = 1.0 + D[0] * t + D[1] * t.powi(2) + D[2] * t.powi(3);
557    let z = t - num / den;
558    if p >= 0.5 {
559        z
560    } else {
561        -z
562    }
563}
564
565// ---------------------------------------------------------------------------
566// Empirical return periods
567// ---------------------------------------------------------------------------
568
569/// Compute empirical return periods for sorted data using various plotting position formulas.
570///
571/// Returns `(sorted_data, return_periods)` — the data sorted in ascending order and the
572/// corresponding empirical return periods T = 1 / (1 − F̂).
573///
574/// # Arguments
575/// - `data`: observed sample.
576/// - `plotting_position`: plotting position formula.
577///
578/// # Errors
579/// - [`StatsError::InsufficientData`] if `data` is empty.
580pub fn empirical_return_periods(
581    data: ArrayView1<f64>,
582    plotting_position: PlottingPosition,
583) -> Result<(Array1<f64>, Array1<f64>), StatsError> {
584    let n = data.len();
585    if n == 0 {
586        return Err(StatsError::InsufficientData(
587            "Data must not be empty".into(),
588        ));
589    }
590
591    let mut sorted: Vec<f64> = data.iter().copied().collect();
592    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
593    let nf = n as f64;
594
595    let return_periods: Vec<f64> = (1..=n)
596        .map(|i| {
597            let if64 = i as f64;
598            // Empirical exceedance probability (using plotting position)
599            let f_i = match plotting_position {
600                PlottingPosition::Weibull => if64 / (nf + 1.0),
601                PlottingPosition::Gringorten => (if64 - 0.44) / (nf + 0.12),
602                PlottingPosition::Blom => (if64 - 0.375) / (nf + 0.25),
603                PlottingPosition::Hazen => (if64 - 0.5) / nf,
604            };
605            let f_i = f_i.clamp(1e-10, 1.0 - 1e-10);
606            1.0 / (1.0 - f_i)
607        })
608        .collect();
609
610    Ok((Array1::from(sorted), Array1::from(return_periods)))
611}
612
613// ---------------------------------------------------------------------------
614// Tests
615// ---------------------------------------------------------------------------
616
617#[cfg(test)]
618mod tests {
619    use super::*;
620    use scirs2_core::ndarray::{array, Array1};
621
622    fn approx_eq(a: f64, b: f64, tol: f64) -> bool {
623        (a - b).abs() < tol
624    }
625
626    fn relative_eq(a: f64, b: f64, rtol: f64) -> bool {
627        let denom = b.abs().max(1e-12);
628        (a - b).abs() / denom < rtol
629    }
630
631    // Helper: generate synthetic data from a known Gumbel(μ, β) distribution
632    fn gumbel_sample(mu: f64, beta: f64, n: usize, seed: u64) -> Array1<f64> {
633        use super::super::distributions::Gumbel;
634        let g = Gumbel::new(mu, beta).unwrap();
635        Array1::from(g.sample(n, seed))
636    }
637
638    // ---- Block Maxima -------------------------------------------------------
639
640    #[test]
641    fn test_block_maxima_basic() {
642        let data = gumbel_sample(10.0, 2.0, 500, 1);
643        let result = block_maxima_analysis(
644            data.view(),
645            50,
646            &[10.0, 100.0],
647            EvtEstimationMethod::LMoments,
648        )
649        .unwrap();
650        assert_eq!(result.n_blocks, 10);
651        assert_eq!(result.block_maxima.len(), 10);
652        assert_eq!(result.return_levels.len(), 2);
653        assert!(result.gev_params.sigma > 0.0);
654    }
655
656    #[test]
657    fn test_block_maxima_return_levels_increasing() {
658        let data = gumbel_sample(5.0, 1.5, 1000, 2);
659        let result = block_maxima_analysis(
660            data.view(),
661            100,
662            &[10.0, 50.0, 100.0, 1000.0],
663            EvtEstimationMethod::LMoments,
664        )
665        .unwrap();
666        let levels: Vec<f64> = result.return_levels.iter().map(|&(_, l)| l).collect();
667        // Higher return periods must give higher (or equal) return levels
668        for w in levels.windows(2) {
669            assert!(
670                w[1] >= w[0] - 1e-6,
671                "levels not non-decreasing: {:?}",
672                levels
673            );
674        }
675    }
676
677    #[test]
678    fn test_block_maxima_mle() {
679        let data = gumbel_sample(0.0, 1.0, 400, 10);
680        let result =
681            block_maxima_analysis(data.view(), 40, &[10.0, 100.0], EvtEstimationMethod::MLE);
682        assert!(
683            result.is_ok(),
684            "MLE block maxima failed: {:?}",
685            result.err()
686        );
687    }
688
689    #[test]
690    fn test_block_maxima_pwm() {
691        let data = gumbel_sample(0.0, 1.0, 400, 11);
692        let result = block_maxima_analysis(data.view(), 40, &[10.0], EvtEstimationMethod::PWM);
693        assert!(result.is_ok());
694    }
695
696    #[test]
697    fn test_block_maxima_zero_block_size_error() {
698        let data = gumbel_sample(0.0, 1.0, 100, 3);
699        assert!(
700            block_maxima_analysis(data.view(), 0, &[10.0], EvtEstimationMethod::LMoments).is_err()
701        );
702    }
703
704    #[test]
705    fn test_block_maxima_too_few_blocks_error() {
706        // Only 1 block → error
707        let data = gumbel_sample(0.0, 1.0, 50, 4);
708        assert!(
709            block_maxima_analysis(data.view(), 60, &[10.0], EvtEstimationMethod::LMoments).is_err()
710        );
711    }
712
713    #[test]
714    fn test_block_maxima_invalid_return_period_error() {
715        let data = gumbel_sample(0.0, 1.0, 200, 5);
716        assert!(
717            block_maxima_analysis(data.view(), 20, &[0.5], EvtEstimationMethod::LMoments).is_err()
718        );
719    }
720
721    // ---- POT ---------------------------------------------------------------
722
723    #[test]
724    fn test_pot_basic() {
725        let data = gumbel_sample(5.0, 2.0, 500, 20);
726        let threshold = 7.0;
727        let result = peaks_over_threshold(data.view(), threshold, &[10.0, 100.0]).unwrap();
728        assert_eq!(result.threshold, threshold);
729        assert!(result.n_exceedances > 0);
730        assert!(result.rate > 0.0 && result.rate < 1.0);
731        assert!(result.gpd_params.sigma > 0.0);
732    }
733
734    #[test]
735    fn test_pot_return_levels_increasing() {
736        let data = gumbel_sample(0.0, 1.0, 1000, 21);
737        let result = peaks_over_threshold(data.view(), 1.0, &[5.0, 10.0, 50.0, 100.0]).unwrap();
738        let levels: Vec<f64> = result.return_levels.iter().map(|&(_, l)| l).collect();
739        for w in levels.windows(2) {
740            assert!(w[1] >= w[0] - 1e-6, "{:?}", levels);
741        }
742    }
743
744    #[test]
745    fn test_pot_insufficient_exceedances_error() {
746        let data = array![1.0, 2.0, 3.0, 4.0, 100.0];
747        // Only 1 value exceeds 50 → error
748        assert!(peaks_over_threshold(data.view(), 50.0, &[10.0]).is_err());
749    }
750
751    #[test]
752    fn test_pot_invalid_return_period_error() {
753        let data = gumbel_sample(0.0, 1.0, 200, 22);
754        assert!(peaks_over_threshold(data.view(), 0.0, &[0.5]).is_err());
755    }
756
757    // ---- Mean Excess -------------------------------------------------------
758
759    #[test]
760    fn test_mean_excess_plot_basic() {
761        let data = gumbel_sample(0.0, 1.0, 200, 30);
762        let (thresholds, me) = mean_excess_plot(data.view(), 20).unwrap();
763        assert!(!thresholds.is_empty());
764        assert_eq!(thresholds.len(), me.len());
765        assert!(me.iter().all(|&v| v >= 0.0));
766    }
767
768    #[test]
769    fn test_mean_excess_plot_insufficient_data_error() {
770        let data = array![1.0, 2.0, 3.0];
771        assert!(mean_excess_plot(data.view(), 10).is_err());
772    }
773
774    #[test]
775    fn test_mean_excess_exponential_linear() {
776        // For an exponential distribution, the mean excess function is constant (linear with slope 0)
777        use super::super::distributions::GeneralizedPareto;
778        let gpd = GeneralizedPareto::new(0.0, 2.0, 0.0).unwrap(); // exponential
779        let samples = gpd.sample(500, 42);
780        let arr = Array1::from(samples);
781        let (_, me) = mean_excess_plot(arr.view(), 15).unwrap();
782        // Mean excess should be approximately constant ≈ 2.0 near lower thresholds
783        let first = me[0];
784        let last = me[me.len() - 1];
785        // Allow reasonable variance; just check it's in the right ballpark
786        assert!(first > 0.0 && last > 0.0);
787    }
788
789    // ---- Hill estimator ----------------------------------------------------
790
791    #[test]
792    fn test_hill_basic_heavy_tail() {
793        // Pareto data: CDF = 1 - x^{-alpha} for x >= 1; tail index xi = 1/alpha
794        let alpha = 2.0_f64;
795        // Generate pseudo-Pareto data using quantile inversion: x = (1-u)^{-1/alpha}
796        let mut data: Vec<f64> = (1..=500)
797            .map(|i| {
798                let u = i as f64 / 501.0;
799                (1.0 - u).powf(-1.0 / alpha)
800            })
801            .collect();
802        data.sort_by(|a, b| a.partial_cmp(b).unwrap());
803        let arr = Array1::from(data);
804        let xi_hat = hill_estimator(arr.view(), 50).unwrap();
805        // Expected: xi ≈ 0.5 (= 1/alpha)
806        assert!(relative_eq(xi_hat, 1.0 / alpha, 0.25), "xi_hat={xi_hat}");
807    }
808
809    #[test]
810    fn test_hill_invalid_k_error() {
811        let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
812        assert!(hill_estimator(data.view(), 0).is_err());
813        assert!(hill_estimator(data.view(), 5).is_err()); // k >= n
814    }
815
816    #[test]
817    fn test_hill_insufficient_data_error() {
818        let data = array![1.0];
819        assert!(hill_estimator(data.view(), 1).is_err());
820    }
821
822    // ---- Pickands estimator -----------------------------------------------
823
824    #[test]
825    fn test_pickands_basic() {
826        // Pareto(alpha=2): tail index xi = 0.5
827        let alpha = 2.0_f64;
828        let data: Vec<f64> = (1..=200)
829            .map(|i| {
830                let u = i as f64 / 201.0;
831                (1.0 - u).powf(-1.0 / alpha)
832            })
833            .collect();
834        let arr = Array1::from(data);
835        let k = 10;
836        let xi_hat = pickands_estimator(arr.view(), k).unwrap();
837        // Pickands is noisier; allow wider tolerance
838        assert!(xi_hat.is_finite(), "xi_hat={xi_hat}");
839    }
840
841    #[test]
842    fn test_pickands_invalid_k_error() {
843        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
844        assert!(pickands_estimator(data.view(), 0).is_err());
845        assert!(pickands_estimator(data.view(), 3).is_err()); // 4*3 = 12 >= 10
846    }
847
848    // ---- Return level confidence interval ---------------------------------
849
850    #[test]
851    fn test_return_level_ci_basic() {
852        let gev = GeneralizedExtremeValue::new(0.0, 1.0, 0.1).unwrap();
853        let (lo, est, hi) = return_level_confidence(&gev, 100.0, 50, 0.05).unwrap();
854        assert!(lo < est, "lo={lo} should be < est={est}");
855        assert!(est < hi, "est={est} should be < hi={hi}");
856    }
857
858    #[test]
859    fn test_return_level_ci_wider_for_small_n() {
860        let gev = GeneralizedExtremeValue::new(0.0, 1.0, 0.0).unwrap();
861        let (lo50, _, hi50) = return_level_confidence(&gev, 100.0, 50, 0.05).unwrap();
862        let (lo500, _, hi500) = return_level_confidence(&gev, 100.0, 500, 0.05).unwrap();
863        let width_50 = hi50 - lo50;
864        let width_500 = hi500 - lo500;
865        assert!(width_50 > width_500, "Smaller n should give wider CI");
866    }
867
868    #[test]
869    fn test_return_level_ci_invalid_inputs() {
870        let gev = GeneralizedExtremeValue::new(0.0, 1.0, 0.0).unwrap();
871        assert!(return_level_confidence(&gev, 0.5, 100, 0.05).is_err());
872        assert!(return_level_confidence(&gev, 100.0, 100, 0.0).is_err());
873        assert!(return_level_confidence(&gev, 100.0, 100, 0.6).is_err());
874    }
875
876    // ---- Empirical return periods ------------------------------------------
877
878    #[test]
879    fn test_empirical_return_periods_weibull() {
880        let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
881        let (sorted, rp) =
882            empirical_return_periods(data.view(), PlottingPosition::Weibull).unwrap();
883        assert_eq!(sorted.len(), 5);
884        assert_eq!(rp.len(), 5);
885        // Smallest data point has smallest return period, largest has largest
886        for w in rp.iter().collect::<Vec<_>>().windows(2) {
887            assert!(w[1] > w[0]);
888        }
889    }
890
891    #[test]
892    fn test_empirical_return_periods_all_methods() {
893        let data = gumbel_sample(0.0, 1.0, 100, 50);
894        for method in [
895            PlottingPosition::Weibull,
896            PlottingPosition::Gringorten,
897            PlottingPosition::Blom,
898            PlottingPosition::Hazen,
899        ] {
900            let (s, rp) = empirical_return_periods(data.view(), method).unwrap();
901            assert_eq!(s.len(), 100);
902            assert_eq!(rp.len(), 100);
903            assert!(rp.iter().all(|&r| r >= 1.0));
904        }
905    }
906
907    #[test]
908    fn test_empirical_return_periods_empty_error() {
909        let data: Array1<f64> = Array1::zeros(0);
910        assert!(empirical_return_periods(data.view(), PlottingPosition::Weibull).is_err());
911    }
912
913    #[test]
914    fn test_normal_quantile_symmetry() {
915        let z_95 = normal_quantile(0.975);
916        let z_05 = normal_quantile(0.025);
917        assert!(approx_eq(z_95, -z_05, 1e-6));
918        // z_{0.975} ≈ 1.96
919        assert!(approx_eq(z_95, 1.96, 0.01));
920    }
921}
scirs2_stats/extreme_value/analysis.rs

scirs2_stats/extreme_value/
analysis.rs