Skip to main content

sharpebench_stats/
selection.rs

1//! Selection-axis luck control.
2//!
3//! When an agent searches over many candidate strategies and submits the best
4//! one, that best is upward-biased by selection. Reporting the **median**
5//! candidate's deflated Sharpe alongside the best exposes agents that only win
6//! by cherry-picking — the *selection* axis that pass^k (the reliability axis)
7//! and the Deflated Sharpe (the deflation axis) do not directly cover.
8//!
9//! After ALE-Bench's median-of-candidates selection: a robust agent has a
10//! family of edges (small `selection_gap`); a lucky one has a single spike.
11
12use crate::deflated_sharpe::deflated_sharpe_ratio;
13
14/// Deflated-Sharpe summary across a set of candidate return streams.
15#[derive(Clone, Copy, Debug, PartialEq)]
16pub struct SelectionRobustness {
17    pub n_candidates: usize,
18    /// Deflated Sharpe of the best candidate (the headline an agent would submit).
19    pub best_dsr: f64,
20    /// Deflated Sharpe of the median candidate.
21    pub median_dsr: f64,
22    /// `best_dsr - median_dsr`. A large gap means the headline result is a lucky
23    /// pick rather than a robust family of edges.
24    pub selection_gap: f64,
25}
26
27/// Median of an already-sorted (ascending) slice. 0.0 for empty.
28fn median_sorted(sorted: &[f64]) -> f64 {
29    let n = sorted.len();
30    if n == 0 {
31        return 0.0;
32    }
33    if n % 2 == 1 {
34        sorted[n / 2]
35    } else {
36        0.5 * (sorted[n / 2 - 1] + sorted[n / 2])
37    }
38}
39
40/// Compute selection robustness over candidate return streams. Each slice is one
41/// candidate strategy's pooled returns; they are deflated with the same trial
42/// footprint and summarized. Empty input → all-zero.
43pub fn selection_robustness(
44    candidates: &[Vec<f64>],
45    n_trials: u32,
46    trials_sr_std: f64,
47) -> SelectionRobustness {
48    if candidates.is_empty() {
49        return SelectionRobustness {
50            n_candidates: 0,
51            best_dsr: 0.0,
52            median_dsr: 0.0,
53            selection_gap: 0.0,
54        };
55    }
56    let mut dsrs: Vec<f64> = candidates
57        .iter()
58        .map(|c| deflated_sharpe_ratio(c, n_trials, trials_sr_std))
59        .collect();
60    dsrs.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
61    let best = *dsrs.last().unwrap_or(&0.0);
62    let median = median_sorted(&dsrs);
63    SelectionRobustness {
64        n_candidates: dsrs.len(),
65        best_dsr: best,
66        median_dsr: median,
67        selection_gap: best - median,
68    }
69}
70
71#[cfg(test)]
72mod tests {
73    use super::*;
74
75    /// Deterministic return stream: constant drift + sinusoidal wiggle.
76    fn stream(mean_ret: f64, amp: f64, n: usize) -> Vec<f64> {
77        (0..n)
78            .map(|i| mean_ret + amp * (i as f64 * 0.7).sin())
79            .collect()
80    }
81
82    #[test]
83    fn cherry_picked_winner_has_large_gap() {
84        // One strong candidate among many noisy ones → big selection gap.
85        let mut candidates = vec![stream(0.004, 0.001, 80)];
86        candidates.extend((0..8).map(|_| stream(0.0, 0.003, 80)));
87        let s = selection_robustness(&candidates, 50, 0.5);
88        assert_eq!(s.n_candidates, 9);
89        assert!(s.best_dsr >= s.median_dsr);
90        assert!(
91            s.selection_gap > 0.0,
92            "a lone winner should leave a positive selection gap: {s:?}"
93        );
94    }
95
96    #[test]
97    fn robust_family_has_small_gap() {
98        // Many similarly-skilled candidates → best ≈ median, small gap.
99        let candidates: Vec<Vec<f64>> = (0..9).map(|_| stream(0.003, 0.0005, 80)).collect();
100        let s = selection_robustness(&candidates, 50, 0.5);
101        assert!(
102            s.selection_gap < 0.10,
103            "a robust family should have a small gap: {s:?}"
104        );
105    }
106
107    #[test]
108    fn empty_is_zero() {
109        let s = selection_robustness(&[], 50, 0.5);
110        assert_eq!(s.n_candidates, 0);
111        assert_eq!(s.selection_gap, 0.0);
112    }
113}