1#[derive(Debug, Clone)]
21pub struct PsisResult {
22 pub smoothed: Vec<f64>,
23 pub k_hat: f64,
24 pub tail_start: usize,
25 pub tail_count: usize,
26}
27
28pub const MIN_TAIL_COUNT: usize = 5;
29const MAX_TAIL_FRACTION: f64 = 0.2;
30
31pub fn pareto_smooth_weights(weights: &[f64]) -> Option<PsisResult> {
36 if weights.len() < MIN_TAIL_COUNT * 2 || weights.iter().any(|w| !w.is_finite() || *w < 0.0) {
37 return None;
38 }
39 let mut indexed: Vec<(usize, f64)> = weights.iter().copied().enumerate().collect();
40 indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
41 let n = indexed.len();
42 let tail_count = ((n as f64).sqrt().ceil() as usize)
43 .max(MIN_TAIL_COUNT)
44 .min(((MAX_TAIL_FRACTION * n as f64).ceil() as usize).max(MIN_TAIL_COUNT))
45 .min(n - 1);
46 let tail_start = n - tail_count;
47 let threshold = indexed[tail_start - 1].1;
48 let excesses: Vec<f64> = indexed[tail_start..]
49 .iter()
50 .map(|(_, w)| (w - threshold).max(0.0))
51 .collect();
52 let (k_hat, sigma_hat) = fit_gpd_moments(&excesses)?;
53 let mut smoothed = weights.to_vec();
54 let mut previous = threshold;
55 for (rank, (original_idx, _)) in indexed[tail_start..].iter().enumerate() {
56 let p = (rank as f64 + 0.5) / tail_count as f64;
57 let q = threshold + gpd_quantile(p, k_hat, sigma_hat).max(0.0);
58 let monotone = q.max(previous);
59 smoothed[*original_idx] = monotone;
60 previous = monotone;
61 }
62 Some(PsisResult {
63 smoothed,
64 k_hat,
65 tail_start,
66 tail_count,
67 })
68}
69
70pub fn fit_gpd_moments(excesses: &[f64]) -> Option<(f64, f64)> {
94 let mut x: Vec<f64> = excesses
95 .iter()
96 .copied()
97 .filter(|v| v.is_finite() && *v > 0.0)
98 .collect();
99 if x.len() < MIN_TAIL_COUNT {
100 return None;
101 }
102 x.sort_by(f64::total_cmp);
103 let n = x.len();
104 let nf = n as f64;
105
106 let x_max = x[n - 1];
109 let q_idx = ((nf / 4.0 + 0.5).floor() as usize)
110 .saturating_sub(1)
111 .min(n - 1);
112 let x_star = x[q_idx];
113 if !(x_max.is_finite() && x_max > 0.0 && x_star.is_finite() && x_star > 0.0) {
114 return None;
115 }
116
117 const PRIOR_BS: f64 = 3.0;
120 const PRIOR_K: f64 = 10.0;
121 let m_est = 30 + (nf.sqrt() as usize);
122
123 let mut b_grid = Vec::with_capacity(m_est);
126 let mut len_scale = Vec::with_capacity(m_est);
127 let mut max_ls = f64::NEG_INFINITY;
128 for j in 1..=m_est {
129 let b =
130 (1.0 - (m_est as f64 / (j as f64 - 0.5)).sqrt()) / (PRIOR_BS * x_star) + 1.0 / x_max;
131 let k = profile_shape(b, &x);
132 let arg = k.map(|k| -(b / k));
133 let ls = if let (Some(k), Some(arg)) = (k, arg) {
134 if arg.is_finite() && arg > 0.0 {
135 nf * (arg.ln() - k - 1.0)
136 } else {
137 f64::NEG_INFINITY
138 }
139 } else {
140 f64::NEG_INFINITY
141 };
142 if ls > max_ls {
143 max_ls = ls;
144 }
145 b_grid.push(b);
146 len_scale.push(ls);
147 }
148 if !max_ls.is_finite() {
149 return None;
150 }
151
152 let mut weight_sum = 0.0;
154 let mut b_post = 0.0;
155 for (&b, &ls) in b_grid.iter().zip(len_scale.iter()) {
156 let w = if ls.is_finite() {
157 (ls - max_ls).exp()
158 } else {
159 0.0
160 };
161 weight_sum += w;
162 b_post += w * b;
163 }
164 if !(weight_sum.is_finite() && weight_sum > 0.0) {
165 return None;
166 }
167 b_post /= weight_sum;
168 if !b_post.is_finite() || b_post == 0.0 {
169 return None;
170 }
171
172 let k_raw = profile_shape(b_post, &x)?;
173 let k = (nf * k_raw + PRIOR_K * 0.5) / (nf + PRIOR_K);
175 let sigma = -k / b_post;
176 if !(k.is_finite() && sigma.is_finite() && sigma > 0.0) {
177 return None;
178 }
179 Some((k, sigma))
180}
181
182#[inline]
188fn profile_shape(b: f64, x: &[f64]) -> Option<f64> {
189 if !b.is_finite() || x.is_empty() {
190 return None;
191 }
192 let mut acc = 0.0_f64;
193 for &xi in x {
194 let arg = 1.0 - b * xi;
195 if !(arg.is_finite() && arg > 0.0) {
196 return None;
197 }
198 acc += arg.ln();
199 }
200 Some(acc / x.len() as f64)
201}
202
203#[inline]
204fn gpd_quantile(p: f64, k: f64, sigma: f64) -> f64 {
205 let survival = (1.0 - p).clamp(1e-12, 1.0);
206 if k.abs() < 1e-8 {
207 -sigma * survival.ln()
208 } else {
209 sigma * (survival.powf(-k) - 1.0) / k
210 }
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 fn gpd_sample(u: f64, k: f64, sigma: f64) -> f64 {
218 if k.abs() < 1e-12 {
219 -sigma * (1.0 - u).ln()
220 } else {
221 sigma * ((1.0 - u).powf(-k) - 1.0) / k
222 }
223 }
224
225 #[test]
226 fn psis_k_hat_recovers_known_generalized_pareto_tail() {
227 let k_true = 0.35_f64;
228 let sigma = 1.7_f64;
229 let mut xs = Vec::new();
230 for i in 1..=10_000 {
231 let u = (i as f64 - 0.5) / 10_000.0;
232 xs.push(gpd_sample(u, k_true, sigma));
233 }
234 let (k_hat, sigma_hat) = fit_gpd_moments(&xs).expect("GPD fit should succeed");
235 assert!(
236 (k_hat - k_true).abs() < 0.03,
237 "k_hat={k_hat}, k_true={k_true}"
238 );
239 assert!(
240 (sigma_hat - sigma).abs() < 0.08,
241 "sigma_hat={sigma_hat}, sigma={sigma}"
242 );
243 }
244
245 #[test]
246 fn pareto_smoothing_preserves_nontail_and_reports_heavy_tail() {
247 let mut w = vec![1.0; 200];
251 for i in 1..=120 {
252 let u = (i as f64 - 0.5) / 120.0;
253 w.push(1.0 + gpd_sample(u, 0.7, 0.5));
254 }
255 let out = pareto_smooth_weights(&w).expect("PSIS should fit a positive tail");
256 assert_eq!(out.smoothed[0], 1.0);
257 assert!(
258 out.k_hat > 0.5,
259 "genuine GPD(k=0.7) tail should be flagged as heavy (infinite variance); got k_hat={}",
260 out.k_hat
261 );
262 }
263
264 #[test]
271 fn psis_k_hat_tracks_and_orders_heavy_tails() {
272 let recover = |k_true: f64| -> f64 {
273 let xs: Vec<f64> = (1..=100_000)
274 .map(|i| gpd_sample((i as f64 - 0.5) / 100_000.0, k_true, 1.0))
275 .collect();
276 fit_gpd_moments(&xs).expect("GPD fit should succeed").0
277 };
278 let mut last = f64::NEG_INFINITY;
279 for &k_true in &[0.5_f64, 0.7, 0.85, 1.0, 1.2] {
280 let k_hat = recover(k_true);
281 assert!(
282 (k_hat - k_true).abs() < 0.05,
283 "k_true={k_true}: fitted k_hat={k_hat} not within 0.05"
284 );
285 assert!(
286 k_hat > last,
287 "k_hat must increase with the true shape: {k_hat} !> {last}"
288 );
289 last = k_hat;
290 }
291 }
292
293 #[test]
296 fn psis_gpd_fit_handles_degenerate_inputs() {
297 assert!(
298 fit_gpd_moments(&[1.0, 2.0, 3.0]).is_none(),
299 "fewer than MIN_TAIL_COUNT"
300 );
301 assert!(
302 fit_gpd_moments(&[0.0, -1.0, f64::NAN, 0.0]).is_none(),
303 "no positive finite excesses"
304 );
305 if let Some((k_hat, sigma_hat)) = fit_gpd_moments(&[2.0; 50]) {
310 assert!(k_hat.is_finite() && sigma_hat.is_finite() && sigma_hat > 0.0);
311 assert!(
312 k_hat < 0.5,
313 "degenerate equal excesses must not be flagged heavy; got k_hat={k_hat}"
314 );
315 }
316 }
317
318 #[test]
319 fn psis_profile_shape_rejects_inadmissible_candidates() {
320 let x = [0.25, 1.0, 2.0];
321 assert!(
322 profile_shape(0.49, &x).is_some(),
323 "b below 1/x_max is admissible for all excesses"
324 );
325 assert!(
326 profile_shape(0.5, &x).is_none(),
327 "b at 1/x_max puts the largest excess on the log boundary"
328 );
329 assert!(
330 profile_shape(0.75, &x).is_none(),
331 "b above 1/x_max makes at least one log argument negative"
332 );
333 }
334}