#[derive(Debug, Clone)]
pub struct PsisResult {
pub smoothed: Vec<f64>,
pub k_hat: f64,
pub tail_start: usize,
pub tail_count: usize,
}
pub(crate) const MIN_TAIL_COUNT: usize = 5;
const MAX_TAIL_FRACTION: f64 = 0.2;
pub fn pareto_smooth_weights(weights: &[f64]) -> Option<PsisResult> {
if weights.len() < MIN_TAIL_COUNT * 2 || weights.iter().any(|w| !w.is_finite() || *w < 0.0) {
return None;
}
let mut indexed: Vec<(usize, f64)> = weights.iter().copied().enumerate().collect();
indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
let n = indexed.len();
let tail_count = ((n as f64).sqrt().ceil() as usize)
.max(MIN_TAIL_COUNT)
.min(((MAX_TAIL_FRACTION * n as f64).ceil() as usize).max(MIN_TAIL_COUNT))
.min(n - 1);
let tail_start = n - tail_count;
let threshold = indexed[tail_start - 1].1;
let excesses: Vec<f64> = indexed[tail_start..]
.iter()
.map(|(_, w)| (w - threshold).max(0.0))
.collect();
let (k_hat, sigma_hat) = fit_gpd_moments(&excesses)?;
let mut smoothed = weights.to_vec();
let mut previous = threshold;
for (rank, (original_idx, _)) in indexed[tail_start..].iter().enumerate() {
let p = (rank as f64 + 0.5) / tail_count as f64;
let q = threshold + gpd_quantile(p, k_hat, sigma_hat).max(0.0);
let monotone = q.max(previous);
smoothed[*original_idx] = monotone;
previous = monotone;
}
Some(PsisResult {
smoothed,
k_hat,
tail_start,
tail_count,
})
}
pub fn fit_gpd_moments(excesses: &[f64]) -> Option<(f64, f64)> {
let mut x: Vec<f64> = excesses
.iter()
.copied()
.filter(|v| v.is_finite() && *v > 0.0)
.collect();
if x.len() < MIN_TAIL_COUNT {
return None;
}
x.sort_by(f64::total_cmp);
let n = x.len();
let nf = n as f64;
let x_max = x[n - 1];
let q_idx = ((nf / 4.0 + 0.5).floor() as usize)
.saturating_sub(1)
.min(n - 1);
let x_star = x[q_idx];
if !(x_max.is_finite() && x_max > 0.0 && x_star.is_finite() && x_star > 0.0) {
return None;
}
const PRIOR_BS: f64 = 3.0;
const PRIOR_K: f64 = 10.0;
let m_est = 30 + (nf.sqrt() as usize);
let mut b_grid = Vec::with_capacity(m_est);
let mut len_scale = Vec::with_capacity(m_est);
let mut max_ls = f64::NEG_INFINITY;
for j in 1..=m_est {
let b =
(1.0 - (m_est as f64 / (j as f64 - 0.5)).sqrt()) / (PRIOR_BS * x_star) + 1.0 / x_max;
let k = profile_shape(b, &x);
let arg = -(b / k);
let ls = if arg.is_finite() && arg > 0.0 && k.is_finite() {
nf * (arg.ln() - k - 1.0)
} else {
f64::NEG_INFINITY
};
if ls > max_ls {
max_ls = ls;
}
b_grid.push(b);
len_scale.push(ls);
}
if !max_ls.is_finite() {
return None;
}
let mut weight_sum = 0.0;
let mut b_post = 0.0;
for (&b, &ls) in b_grid.iter().zip(len_scale.iter()) {
let w = if ls.is_finite() {
(ls - max_ls).exp()
} else {
0.0
};
weight_sum += w;
b_post += w * b;
}
if !(weight_sum.is_finite() && weight_sum > 0.0) {
return None;
}
b_post /= weight_sum;
if !b_post.is_finite() || b_post == 0.0 {
return None;
}
let k_raw = profile_shape(b_post, &x);
let k = (nf * k_raw + PRIOR_K * 0.5) / (nf + PRIOR_K);
let sigma = -k / b_post;
if !(k.is_finite() && sigma.is_finite() && sigma > 0.0) {
return None;
}
Some((k, sigma))
}
#[inline]
fn profile_shape(b: f64, x: &[f64]) -> f64 {
let acc: f64 = x.iter().map(|&xi| (-b * xi).ln_1p()).sum();
acc / x.len() as f64
}
#[inline]
fn gpd_quantile(p: f64, k: f64, sigma: f64) -> f64 {
let survival = (1.0 - p).clamp(1e-12, 1.0);
if k.abs() < 1e-8 {
-sigma * survival.ln()
} else {
sigma * (survival.powf(-k) - 1.0) / k
}
}
#[cfg(test)]
mod tests {
use super::*;
fn gpd_sample(u: f64, k: f64, sigma: f64) -> f64 {
if k.abs() < 1e-12 {
-sigma * (1.0 - u).ln()
} else {
sigma * ((1.0 - u).powf(-k) - 1.0) / k
}
}
#[test]
fn psis_k_hat_recovers_known_generalized_pareto_tail() {
let k_true = 0.35_f64;
let sigma = 1.7_f64;
let mut xs = Vec::new();
for i in 1..=10_000 {
let u = (i as f64 - 0.5) / 10_000.0;
xs.push(gpd_sample(u, k_true, sigma));
}
let (k_hat, sigma_hat) = fit_gpd_moments(&xs).expect("GPD fit should succeed");
assert!(
(k_hat - k_true).abs() < 0.03,
"k_hat={k_hat}, k_true={k_true}"
);
assert!(
(sigma_hat - sigma).abs() < 0.08,
"sigma_hat={sigma_hat}, sigma={sigma}"
);
}
#[test]
fn pareto_smoothing_preserves_nontail_and_reports_heavy_tail() {
let mut w = vec![1.0; 200];
for i in 1..=120 {
let u = (i as f64 - 0.5) / 120.0;
w.push(1.0 + gpd_sample(u, 0.7, 0.5));
}
let out = pareto_smooth_weights(&w).expect("PSIS should fit a positive tail");
assert_eq!(out.smoothed[0], 1.0);
assert!(
out.k_hat > 0.5,
"genuine GPD(k=0.7) tail should be flagged as heavy (infinite variance); got k_hat={}",
out.k_hat
);
}
#[test]
fn psis_k_hat_tracks_and_orders_heavy_tails() {
let recover = |k_true: f64| -> f64 {
let xs: Vec<f64> = (1..=100_000)
.map(|i| gpd_sample((i as f64 - 0.5) / 100_000.0, k_true, 1.0))
.collect();
fit_gpd_moments(&xs).expect("GPD fit should succeed").0
};
let mut last = f64::NEG_INFINITY;
for &k_true in &[0.5_f64, 0.7, 0.85, 1.0, 1.2] {
let k_hat = recover(k_true);
assert!(
(k_hat - k_true).abs() < 0.05,
"k_true={k_true}: fitted k_hat={k_hat} not within 0.05"
);
assert!(
k_hat > last,
"k_hat must increase with the true shape: {k_hat} !> {last}"
);
last = k_hat;
}
}
#[test]
fn psis_gpd_fit_handles_degenerate_inputs() {
assert!(
fit_gpd_moments(&[1.0, 2.0, 3.0]).is_none(),
"fewer than MIN_TAIL_COUNT"
);
assert!(
fit_gpd_moments(&[0.0, -1.0, f64::NAN, 0.0]).is_none(),
"no positive finite excesses"
);
if let Some((k_hat, sigma_hat)) = fit_gpd_moments(&[2.0; 50]) {
assert!(k_hat.is_finite() && sigma_hat.is_finite() && sigma_hat > 0.0);
assert!(
k_hat < 0.5,
"degenerate equal excesses must not be flagged heavy; got k_hat={k_hat}"
);
}
}
}