use serde::{Deserialize, Serialize};
use crate::calibration::brier_score;
use crate::decay::edge_half_life;
use crate::deflated_sharpe::{deflated_sharpe_ratio, probabilistic_sharpe_ratio};
use crate::pass_k::{pass_k, PassMode};
use crate::percentile::percentile_of;
use crate::process::{process_score, ProcessEvent, Trace};
use crate::rolling::rolling_sharpe;
use crate::selection::{selection_robustness, SelectionRobustness};
use crate::significance::bootstrap_pvalue;
use crate::stats::mean;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Run {
pub returns: Vec<f64>,
#[serde(default)]
pub trace: Trace,
#[serde(default)]
pub confidences: Vec<f64>,
#[serde(default)]
pub outcomes: Vec<bool>,
#[serde(default)]
pub cost: f64,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AgentSubmission {
pub agent_id: String,
pub runs: Vec<Run>,
#[serde(default)]
pub in_sample_trials: u32,
#[serde(default)]
pub candidates: Vec<Vec<f64>>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
pub enum RankKey {
#[default]
DeflatedSharpe,
Alpha,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Mandate {
pub max_drawdown: f64,
}
impl Default for Mandate {
fn default() -> Self {
Self { max_drawdown: 1.0 }
}
}
fn max_drawdown(returns: &[f64]) -> f64 {
let mut nav = 1.0;
let mut peak = 1.0;
let mut mdd = 0.0;
for &r in returns {
nav *= 1.0 + r;
if nav > peak {
peak = nav;
}
if peak > 0.0 {
let dd = 1.0 - nav / peak;
if dd > mdd {
mdd = dd;
}
}
}
mdd
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ScoreConfig {
pub n_trials: u32,
pub trials_sr_std: f64,
pub dsr_bar: f64,
pub per_run_psr_bar: f64,
pub alpha: f64,
pub bootstrap_seed: u64,
pub n_boot: usize,
pub block_prob: f64,
#[serde(default)]
pub mandate: Mandate,
#[serde(default)]
pub rank_key: RankKey,
#[serde(default)]
pub reference_dsr_population: Vec<f64>,
#[serde(default = "default_rolling_window")]
pub rolling_window: usize,
}
fn default_rolling_window() -> usize {
21
}
impl Default for ScoreConfig {
fn default() -> Self {
Self {
n_trials: 50,
trials_sr_std: 0.5,
dsr_bar: 0.95,
per_run_psr_bar: 0.90,
alpha: 0.05,
bootstrap_seed: 0x5BA7_2026,
n_boot: 2000,
block_prob: 0.1,
mandate: Mandate::default(),
rank_key: RankKey::default(),
reference_dsr_population: Vec::new(),
rolling_window: default_rolling_window(),
}
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct CompositeScore {
pub agent_id: String,
pub deflated_sharpe: f64,
pub psr: f64,
pub passed_k: bool,
pub process_ok: bool,
pub bootstrap_p: f64,
pub raw_mean_return: f64,
pub rank_eligible: bool,
pub composite: f64,
pub alpha: f64,
pub beta: f64,
pub calibration_brier: Option<f64>,
pub edge_half_life: Option<f64>,
pub field_reality_check_p: f64,
pub max_drawdown: f64,
pub mandate_ok: bool,
pub turnover: f64,
pub pareto_optimal: bool,
pub step_down_significant: bool,
pub confidence_weighted_return: f64,
pub cost: f64,
pub return_per_cost: Option<f64>,
pub field_spa_p: f64,
pub field_spa_consistent_p: f64,
pub field_crowdedness: Option<f64>,
pub in_sample_trials: u32,
pub effective_n_trials: u32,
pub dsr_percentile: Option<f64>,
pub selection_median_dsr: Option<f64>,
pub selection_gap: Option<f64>,
pub rank_ordinal: usize,
pub rolling_min_sharpe: Option<f64>,
pub rolling_frac_positive: Option<f64>,
pub sortino: Option<f64>,
pub downside_deviation: f64,
pub dsr_per_cost: Option<f64>,
pub process_floored: bool,
pub realized_floored_return: f64,
}
fn dominates(a: &CompositeScore, b: &CompositeScore) -> bool {
a.raw_mean_return >= b.raw_mean_return
&& a.max_drawdown <= b.max_drawdown
&& a.turnover <= b.turnover
&& (a.raw_mean_return > b.raw_mean_return
|| a.max_drawdown < b.max_drawdown
|| a.turnover < b.turnover)
}
pub fn score_agent(sub: &AgentSubmission, cfg: &ScoreConfig) -> CompositeScore {
let pooled: Vec<f64> = sub
.runs
.iter()
.flat_map(|r| r.returns.iter().copied())
.collect();
let psr = probabilistic_sharpe_ratio(&pooled, 0.0);
let effective_n_trials = cfg.n_trials.saturating_add(sub.in_sample_trials);
let dsr = deflated_sharpe_ratio(&pooled, effective_n_trials, cfg.trials_sr_std);
let per_run: Vec<bool> = sub
.runs
.iter()
.map(|r| probabilistic_sharpe_ratio(&r.returns, 0.0) >= cfg.per_run_psr_bar)
.collect();
let passed_k = pass_k(&per_run, PassMode::All);
let process_ok = sub.runs.iter().all(|r| process_score(&r.trace).is_clean());
let bootstrap_p = bootstrap_pvalue(&pooled, cfg.bootstrap_seed, cfg.n_boot, cfg.block_prob);
let raw_mean_return = mean(&pooled);
let conf: Vec<f64> = sub
.runs
.iter()
.flat_map(|r| r.confidences.iter().copied())
.collect();
let outc: Vec<bool> = sub
.runs
.iter()
.flat_map(|r| r.outcomes.iter().copied())
.collect();
let calibration_brier = if !conf.is_empty() && !outc.is_empty() {
Some(brier_score(&conf, &outc))
} else {
None
};
let per_run_edge: Vec<f64> = sub.runs.iter().map(|r| mean(&r.returns)).collect();
let edge_half_life_periods = edge_half_life(&per_run_edge);
let mdd = max_drawdown(&pooled);
let mandate_ok = mdd <= cfg.mandate.max_drawdown;
let total_orders: usize = sub
.runs
.iter()
.map(|r| {
r.trace
.events
.iter()
.filter(|e| matches!(e, ProcessEvent::OrderPlaced { .. }))
.count()
})
.sum();
let turnover = total_orders as f64 / sub.runs.len().max(1) as f64;
let mut cw_num = 0.0;
let mut cw_den = 0.0;
for r in &sub.runs {
let w = if r.confidences.is_empty() {
1.0
} else {
mean(&r.confidences)
};
cw_num += w * mean(&r.returns);
cw_den += w;
}
let confidence_weighted_return = if cw_den > 0.0 {
cw_num / cw_den
} else {
raw_mean_return
};
let cost: f64 = sub.runs.iter().map(|r| r.cost).sum();
let return_per_cost = if cost > 0.0 {
Some(raw_mean_return / cost)
} else {
None
};
let dsr_percentile = if cfg.reference_dsr_population.is_empty() {
None
} else {
Some(percentile_of(dsr, &cfg.reference_dsr_population))
};
let (selection_median_dsr, selection_gap) = if sub.candidates.is_empty() {
(None, None)
} else {
let sr: SelectionRobustness =
selection_robustness(&sub.candidates, effective_n_trials, cfg.trials_sr_std);
(Some(sr.median_dsr), Some(sr.selection_gap))
};
let rolling = rolling_sharpe(&pooled, cfg.rolling_window);
let rolling_min_sharpe = rolling.map(|r| r.min_sharpe);
let rolling_frac_positive = rolling.map(|r| r.frac_positive);
let sortino = crate::stats::sortino_ratio(&pooled, 0.0);
let downside_deviation = crate::stats::downside_deviation(&pooled, 0.0);
let dsr_per_cost = if cost > 0.0 { Some(dsr / cost) } else { None };
let process_floored = !process_ok;
let realized_floored_return = if process_floored {
0.0
} else {
raw_mean_return
};
let rank_eligible =
dsr >= cfg.dsr_bar && passed_k && process_ok && bootstrap_p < cfg.alpha && mandate_ok;
let composite = if rank_eligible { dsr } else { 0.0 };
CompositeScore {
agent_id: sub.agent_id.clone(),
deflated_sharpe: dsr,
psr,
passed_k,
process_ok,
bootstrap_p,
raw_mean_return,
rank_eligible,
composite,
alpha: 0.0,
beta: 0.0,
calibration_brier,
edge_half_life: edge_half_life_periods,
field_reality_check_p: 1.0,
max_drawdown: mdd,
mandate_ok,
turnover,
pareto_optimal: false,
step_down_significant: false,
confidence_weighted_return,
cost,
return_per_cost,
field_spa_p: 1.0,
field_spa_consistent_p: 1.0,
field_crowdedness: None,
in_sample_trials: sub.in_sample_trials,
effective_n_trials,
dsr_percentile,
selection_median_dsr,
selection_gap,
rank_ordinal: 0,
rolling_min_sharpe,
rolling_frac_positive,
sortino,
downside_deviation,
dsr_per_cost,
process_floored,
realized_floored_return,
}
}
pub fn rank(subs: &[AgentSubmission], cfg: &ScoreConfig) -> Vec<CompositeScore> {
let pooled: Vec<Vec<f64>> = subs
.iter()
.map(|s| {
s.runs
.iter()
.flat_map(|r| r.returns.iter().copied())
.collect()
})
.collect();
let min_len = pooled.iter().map(Vec::len).min().unwrap_or(0);
let n_agents = pooled.len().max(1) as f64;
let market: Vec<f64> = (0..min_len)
.map(|i| pooled.iter().map(|p| p[i]).sum::<f64>() / n_agents)
.collect();
let mut scores: Vec<CompositeScore> = subs
.iter()
.enumerate()
.map(|(idx, s)| {
let mut cs = score_agent(s, cfg);
if min_len >= 2 {
let (alpha, beta) = crate::attribution::alpha_beta(&pooled[idx], &market);
cs.alpha = alpha;
cs.beta = beta;
}
cs
})
.collect();
if min_len >= 2 {
let field_excess: Vec<Vec<f64>> = pooled
.iter()
.map(|p| {
p.iter()
.take(min_len)
.zip(market.iter())
.map(|(a, m)| a - m)
.collect()
})
.collect();
let rc_p = crate::significance::reality_check_pvalue(
&field_excess,
cfg.bootstrap_seed,
cfg.n_boot,
cfg.block_prob,
);
let spa_p = crate::significance::spa_pvalue(
&field_excess,
cfg.bootstrap_seed,
cfg.n_boot,
cfg.block_prob,
);
let spa_c_p = crate::significance::spa_consistent_pvalue(
&field_excess,
cfg.bootstrap_seed,
cfg.n_boot,
cfg.block_prob,
);
for cs in scores.iter_mut() {
cs.field_reality_check_p = rc_p;
cs.field_spa_p = spa_p;
cs.field_spa_consistent_p = spa_c_p;
}
let sd = crate::significance::step_down_significant(
&field_excess,
cfg.bootstrap_seed,
cfg.n_boot,
cfg.block_prob,
cfg.alpha,
);
for (cs, s) in scores.iter_mut().zip(sd) {
cs.step_down_significant = s;
}
}
if min_len >= 2 && pooled.len() >= 2 {
let aligned: Vec<&[f64]> = pooled.iter().map(|p| &p[..min_len]).collect();
for (idx, cs) in scores.iter_mut().enumerate() {
let peers: Vec<&[f64]> = aligned
.iter()
.enumerate()
.filter(|&(j, _)| j != idx)
.map(|(_, &p)| p)
.collect();
cs.field_crowdedness = crate::correlation::crowdedness(aligned[idx], &peers).mean_corr;
}
}
let pareto: Vec<bool> = (0..scores.len())
.map(|i| !(0..scores.len()).any(|j| j != i && dominates(&scores[j], &scores[i])))
.collect();
for (cs, p) in scores.iter_mut().zip(pareto) {
cs.pareto_optimal = p;
}
let sort_key = |s: &CompositeScore| match cfg.rank_key {
RankKey::DeflatedSharpe => s.composite,
RankKey::Alpha => {
if s.rank_eligible {
s.alpha
} else {
f64::NEG_INFINITY
}
}
};
scores.sort_by(|a, b| {
b.rank_eligible
.cmp(&a.rank_eligible)
.then(
sort_key(b)
.partial_cmp(&sort_key(a))
.unwrap_or(std::cmp::Ordering::Equal),
)
.then(
b.raw_mean_return
.partial_cmp(&a.raw_mean_return)
.unwrap_or(std::cmp::Ordering::Equal),
)
});
let mut ord = 0usize;
for cs in scores.iter_mut() {
if cs.rank_eligible {
ord += 1;
cs.rank_ordinal = ord;
}
}
scores
}
#[cfg(test)]
mod tests {
use super::*;
use crate::process::ProcessEvent;
fn run(mean_ret: f64, amp: f64, n: usize) -> Run {
let returns = (0..n)
.map(|i| mean_ret + amp * (i as f64 * 0.7).sin())
.collect();
Run {
returns,
trace: Trace::default(),
confidences: Vec::new(),
outcomes: Vec::new(),
cost: 0.0,
}
}
fn agent(id: &str, runs: Vec<Run>) -> AgentSubmission {
AgentSubmission {
agent_id: id.to_string(),
runs,
in_sample_trials: 0,
candidates: Vec::new(),
}
}
#[test]
fn skilled_is_eligible() {
let s = score_agent(
&agent("skilled", (0..5).map(|_| run(0.002, 0.0005, 60)).collect()),
&ScoreConfig::default(),
);
assert!(s.rank_eligible, "skilled should be eligible: {s:?}");
assert!(s.passed_k && s.process_ok);
}
#[test]
fn lucky_high_return_fails_pass_k() {
let mut runs = vec![run(0.02, 0.002, 60)];
runs.extend((0..4).map(|_| run(0.0, 0.003, 60)));
let s = score_agent(&agent("lucky", runs), &ScoreConfig::default());
assert!(!s.passed_k, "lucky should fail pass^k");
assert!(!s.rank_eligible, "lucky must not be rank-eligible: {s:?}");
}
#[test]
fn process_violator_is_disqualified() {
let mut runs: Vec<Run> = (0..5).map(|_| run(0.002, 0.0005, 60)).collect();
runs[0].trace.events.push(ProcessEvent::OrderPlaced {
risk_gate_passed: false,
});
let s = score_agent(&agent("violator", runs), &ScoreConfig::default());
assert!(!s.process_ok);
assert!(!s.rank_eligible, "a risk-gate bypass must disqualify");
}
#[test]
fn deflation_demotes_luck() {
let skilled = agent("skilled", (0..5).map(|_| run(0.002, 0.0005, 60)).collect());
let lucky = {
let mut runs = vec![run(0.02, 0.002, 60)];
runs.extend((0..4).map(|_| run(0.0, 0.003, 60)));
agent("lucky", runs)
};
let board = rank(&[lucky.clone(), skilled.clone()], &ScoreConfig::default());
let lucky_raw = board
.iter()
.find(|s| s.agent_id == "lucky")
.unwrap()
.raw_mean_return;
let skilled_raw = board
.iter()
.find(|s| s.agent_id == "skilled")
.unwrap()
.raw_mean_return;
assert!(
lucky_raw > skilled_raw,
"lucky raw {lucky_raw} should exceed skilled {skilled_raw}"
);
assert_eq!(board[0].agent_id, "skilled");
assert!(board[0].rank_eligible && !board[1].rank_eligible);
}
#[test]
fn confidence_weighting_rewards_conviction() {
let win = Run {
returns: vec![0.01; 30],
trace: Trace::default(),
confidences: vec![0.9; 30],
outcomes: Vec::new(),
cost: 0.0,
};
let lose = Run {
returns: vec![-0.005; 30],
trace: Trace::default(),
confidences: vec![0.1; 30],
outcomes: Vec::new(),
cost: 0.0,
};
let s = score_agent(&agent("conv", vec![win, lose]), &ScoreConfig::default());
assert!(
s.confidence_weighted_return > s.raw_mean_return,
"cwr {} should beat raw {}",
s.confidence_weighted_return,
s.raw_mean_return
);
}
#[test]
fn cost_efficiency_reported_only_with_cost() {
let mut r = run(0.002, 0.0005, 30);
r.cost = 4.0;
let s = score_agent(&agent("paid", vec![r]), &ScoreConfig::default());
assert_eq!(s.cost, 4.0);
assert!(s.return_per_cost.is_some());
let free = score_agent(
&agent("free", vec![run(0.002, 0.0005, 30)]),
&ScoreConfig::default(),
);
assert!(free.return_per_cost.is_none());
}
#[test]
fn in_sample_search_raises_the_deflation_bar() {
let runs: Vec<Run> = (0..5).map(|_| run(0.002, 0.0005, 60)).collect();
let base = score_agent(&agent("base", runs.clone()), &ScoreConfig::default());
let mut over = agent("over", runs);
over.in_sample_trials = 5000;
let s = score_agent(&over, &ScoreConfig::default());
assert_eq!(s.effective_n_trials, 5050);
assert!(
s.deflated_sharpe <= base.deflated_sharpe,
"more in-sample search must not raise DSR ({} vs {})",
s.deflated_sharpe,
base.deflated_sharpe
);
}
#[test]
fn percentile_reported_only_with_reference() {
let none = score_agent(
&agent("p", (0..5).map(|_| run(0.002, 0.0005, 60)).collect()),
&ScoreConfig::default(),
);
assert!(none.dsr_percentile.is_none());
let cfg = ScoreConfig {
reference_dsr_population: vec![0.0, 0.3, 0.6, 0.9],
..ScoreConfig::default()
};
let some = score_agent(
&agent("p", (0..5).map(|_| run(0.002, 0.0005, 60)).collect()),
&cfg,
);
assert!(some.dsr_percentile.is_some());
}
#[test]
fn rolling_sharpe_reported_for_long_tracks() {
let s = score_agent(
&agent("roll", (0..5).map(|_| run(0.002, 0.0005, 60)).collect()),
&ScoreConfig::default(),
);
assert!(s.rolling_min_sharpe.is_some());
let fp = s.rolling_frac_positive.expect("reported");
assert!(
(fp - 1.0).abs() < 1e-12,
"steady edge → all windows positive"
);
}
#[test]
fn rolling_sharpe_none_when_track_too_short() {
let cfg = ScoreConfig {
rolling_window: 100,
..ScoreConfig::default()
};
let s = score_agent(&agent("short", vec![run(0.002, 0.0005, 30)]), &cfg);
assert!(s.rolling_min_sharpe.is_none());
assert!(s.rolling_frac_positive.is_none());
}
#[test]
fn dsr_per_cost_reported_only_with_cost() {
let mut r = run(0.002, 0.0005, 60);
r.cost = 5.0;
let paid = score_agent(&agent("paid", vec![r]), &ScoreConfig::default());
let dpc = paid.dsr_per_cost.expect("reported with cost");
assert!((dpc - paid.deflated_sharpe / 5.0).abs() < 1e-12);
let free = score_agent(
&agent("free", vec![run(0.002, 0.0005, 60)]),
&ScoreConfig::default(),
);
assert!(free.dsr_per_cost.is_none());
}
#[test]
fn process_violation_floors_realized_return() {
let mut runs: Vec<Run> = (0..5).map(|_| run(0.02, 0.0005, 60)).collect();
runs[0].trace.events.push(ProcessEvent::OrderPlaced {
risk_gate_passed: false,
});
let s = score_agent(&agent("cheater", runs), &ScoreConfig::default());
assert!(s.process_floored, "block violation must set the floor flag");
assert_eq!(
s.realized_floored_return, 0.0,
"floored to no-skill baseline"
);
assert!(
s.raw_mean_return > 0.0,
"raw return is preserved un-floored"
);
assert!(!s.rank_eligible, "eligibility logic intact");
}
#[test]
fn clean_process_is_not_floored() {
let s = score_agent(
&agent("clean", (0..5).map(|_| run(0.002, 0.0005, 60)).collect()),
&ScoreConfig::default(),
);
assert!(!s.process_floored);
assert_eq!(s.realized_floored_return, s.raw_mean_return);
}
#[test]
fn rank_ordinal_is_one_based_among_eligible() {
let skilled = agent("skilled", (0..5).map(|_| run(0.002, 0.0005, 60)).collect());
let lucky = {
let mut runs = vec![run(0.02, 0.002, 60)];
runs.extend((0..4).map(|_| run(0.0, 0.003, 60)));
agent("lucky", runs)
};
let board = rank(&[lucky, skilled], &ScoreConfig::default());
assert_eq!(board[0].rank_ordinal, 1, "leader is ordinal 1");
assert_eq!(board[1].rank_ordinal, 0, "ineligible gets ordinal 0");
}
}