1use serde::{Deserialize, Serialize};
14
15use crate::calibration::brier_score;
16use crate::decay::edge_half_life;
17use crate::deflated_sharpe::{deflated_sharpe_ratio, probabilistic_sharpe_ratio};
18use crate::pass_k::{pass_k, PassMode};
19use crate::percentile::percentile_of;
20use crate::process::{process_score, ProcessEvent, Trace};
21use crate::rolling::rolling_sharpe;
22use crate::selection::{selection_robustness, SelectionRobustness};
23use crate::significance::bootstrap_pvalue;
24use crate::stats::mean;
25
26#[derive(Clone, Debug, Serialize, Deserialize)]
29pub struct Run {
30 pub returns: Vec<f64>,
31 #[serde(default)]
32 pub trace: Trace,
33 #[serde(default)]
34 pub confidences: Vec<f64>,
35 #[serde(default)]
36 pub outcomes: Vec<bool>,
37 #[serde(default)]
40 pub cost: f64,
41}
42
43#[derive(Clone, Debug, Serialize, Deserialize)]
45pub struct AgentSubmission {
46 pub agent_id: String,
47 pub runs: Vec<Run>,
48 #[serde(default)]
52 pub in_sample_trials: u32,
53 #[serde(default)]
57 pub candidates: Vec<Vec<f64>>,
58}
59
60#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Default)]
62#[serde(rename_all = "snake_case")]
63pub enum RankKey {
64 #[default]
66 DeflatedSharpe,
67 Alpha,
69}
70
71#[derive(Clone, Debug, Serialize, Deserialize)]
73pub struct Mandate {
74 pub max_drawdown: f64,
76}
77
78impl Default for Mandate {
79 fn default() -> Self {
80 Self { max_drawdown: 1.0 }
81 }
82}
83
84fn max_drawdown(returns: &[f64]) -> f64 {
86 let mut nav = 1.0;
87 let mut peak = 1.0;
88 let mut mdd = 0.0;
89 for &r in returns {
90 nav *= 1.0 + r;
91 if nav > peak {
92 peak = nav;
93 }
94 if peak > 0.0 {
95 let dd = 1.0 - nav / peak;
96 if dd > mdd {
97 mdd = dd;
98 }
99 }
100 }
101 mdd
102}
103
104#[derive(Clone, Debug, Serialize, Deserialize)]
107pub struct ScoreConfig {
108 pub n_trials: u32,
109 pub trials_sr_std: f64,
110 pub dsr_bar: f64,
112 pub per_run_psr_bar: f64,
114 pub alpha: f64,
116 pub bootstrap_seed: u64,
117 pub n_boot: usize,
118 pub block_prob: f64,
119 #[serde(default)]
121 pub mandate: Mandate,
122 #[serde(default)]
124 pub rank_key: RankKey,
125 #[serde(default)]
128 pub reference_dsr_population: Vec<f64>,
129 #[serde(default = "default_rolling_window")]
132 pub rolling_window: usize,
133}
134
135fn default_rolling_window() -> usize {
137 21
138}
139
140impl Default for ScoreConfig {
141 fn default() -> Self {
142 Self {
143 n_trials: 50,
144 trials_sr_std: 0.5,
145 dsr_bar: 0.95,
146 per_run_psr_bar: 0.90,
147 alpha: 0.05,
148 bootstrap_seed: 0x5BA7_2026,
149 n_boot: 2000,
150 block_prob: 0.1,
151 mandate: Mandate::default(),
152 rank_key: RankKey::default(),
153 reference_dsr_population: Vec::new(),
154 rolling_window: default_rolling_window(),
155 }
156 }
157}
158
159#[derive(Clone, Debug, Serialize, Deserialize)]
161pub struct CompositeScore {
162 pub agent_id: String,
163 pub deflated_sharpe: f64,
164 pub psr: f64,
165 pub passed_k: bool,
166 pub process_ok: bool,
167 pub bootstrap_p: f64,
168 pub raw_mean_return: f64,
169 pub rank_eligible: bool,
170 pub composite: f64,
172 pub alpha: f64,
175 pub beta: f64,
176 pub calibration_brier: Option<f64>,
179 pub edge_half_life: Option<f64>,
182 pub field_reality_check_p: f64,
186 pub max_drawdown: f64,
188 pub mandate_ok: bool,
190 pub turnover: f64,
192 pub pareto_optimal: bool,
195 pub step_down_significant: bool,
198 pub confidence_weighted_return: f64,
202 pub cost: f64,
204 pub return_per_cost: Option<f64>,
206 pub field_spa_p: f64,
210 pub field_spa_consistent_p: f64,
214 pub field_crowdedness: Option<f64>,
220 pub in_sample_trials: u32,
222 pub effective_n_trials: u32,
225 pub dsr_percentile: Option<f64>,
228 pub selection_median_dsr: Option<f64>,
230 pub selection_gap: Option<f64>,
233 pub rank_ordinal: usize,
236 pub rolling_min_sharpe: Option<f64>,
240 pub rolling_frac_positive: Option<f64>,
244 pub sortino: Option<f64>,
248 pub downside_deviation: f64,
251 pub dsr_per_cost: Option<f64>,
254 pub process_floored: bool,
257 pub realized_floored_return: f64,
261}
262
263fn dominates(a: &CompositeScore, b: &CompositeScore) -> bool {
265 a.raw_mean_return >= b.raw_mean_return
266 && a.max_drawdown <= b.max_drawdown
267 && a.turnover <= b.turnover
268 && (a.raw_mean_return > b.raw_mean_return
269 || a.max_drawdown < b.max_drawdown
270 || a.turnover < b.turnover)
271}
272
273pub fn score_agent(sub: &AgentSubmission, cfg: &ScoreConfig) -> CompositeScore {
275 let pooled: Vec<f64> = sub
276 .runs
277 .iter()
278 .flat_map(|r| r.returns.iter().copied())
279 .collect();
280
281 let psr = probabilistic_sharpe_ratio(&pooled, 0.0);
282 let effective_n_trials = cfg.n_trials.saturating_add(sub.in_sample_trials);
286 let dsr = deflated_sharpe_ratio(&pooled, effective_n_trials, cfg.trials_sr_std);
287
288 let per_run: Vec<bool> = sub
290 .runs
291 .iter()
292 .map(|r| probabilistic_sharpe_ratio(&r.returns, 0.0) >= cfg.per_run_psr_bar)
293 .collect();
294 let passed_k = pass_k(&per_run, PassMode::All);
295
296 let process_ok = sub.runs.iter().all(|r| process_score(&r.trace).is_clean());
298
299 let bootstrap_p = bootstrap_pvalue(&pooled, cfg.bootstrap_seed, cfg.n_boot, cfg.block_prob);
300 let raw_mean_return = mean(&pooled);
301
302 let conf: Vec<f64> = sub
304 .runs
305 .iter()
306 .flat_map(|r| r.confidences.iter().copied())
307 .collect();
308 let outc: Vec<bool> = sub
309 .runs
310 .iter()
311 .flat_map(|r| r.outcomes.iter().copied())
312 .collect();
313 let calibration_brier = if !conf.is_empty() && !outc.is_empty() {
314 Some(brier_score(&conf, &outc))
315 } else {
316 None
317 };
318
319 let per_run_edge: Vec<f64> = sub.runs.iter().map(|r| mean(&r.returns)).collect();
321 let edge_half_life_periods = edge_half_life(&per_run_edge);
322
323 let mdd = max_drawdown(&pooled);
325 let mandate_ok = mdd <= cfg.mandate.max_drawdown;
326
327 let total_orders: usize = sub
329 .runs
330 .iter()
331 .map(|r| {
332 r.trace
333 .events
334 .iter()
335 .filter(|e| matches!(e, ProcessEvent::OrderPlaced { .. }))
336 .count()
337 })
338 .sum();
339 let turnover = total_orders as f64 / sub.runs.len().max(1) as f64;
340
341 let mut cw_num = 0.0;
344 let mut cw_den = 0.0;
345 for r in &sub.runs {
346 let w = if r.confidences.is_empty() {
347 1.0
348 } else {
349 mean(&r.confidences)
350 };
351 cw_num += w * mean(&r.returns);
352 cw_den += w;
353 }
354 let confidence_weighted_return = if cw_den > 0.0 {
355 cw_num / cw_den
356 } else {
357 raw_mean_return
358 };
359
360 let cost: f64 = sub.runs.iter().map(|r| r.cost).sum();
362 let return_per_cost = if cost > 0.0 {
363 Some(raw_mean_return / cost)
364 } else {
365 None
366 };
367
368 let dsr_percentile = if cfg.reference_dsr_population.is_empty() {
371 None
372 } else {
373 Some(percentile_of(dsr, &cfg.reference_dsr_population))
374 };
375
376 let (selection_median_dsr, selection_gap) = if sub.candidates.is_empty() {
380 (None, None)
381 } else {
382 let sr: SelectionRobustness =
383 selection_robustness(&sub.candidates, effective_n_trials, cfg.trials_sr_std);
384 (Some(sr.median_dsr), Some(sr.selection_gap))
385 };
386
387 let rolling = rolling_sharpe(&pooled, cfg.rolling_window);
390 let rolling_min_sharpe = rolling.map(|r| r.min_sharpe);
391 let rolling_frac_positive = rolling.map(|r| r.frac_positive);
392
393 let sortino = crate::stats::sortino_ratio(&pooled, 0.0);
396 let downside_deviation = crate::stats::downside_deviation(&pooled, 0.0);
397
398 let dsr_per_cost = if cost > 0.0 { Some(dsr / cost) } else { None };
400
401 let process_floored = !process_ok;
406 let realized_floored_return = if process_floored {
407 0.0
408 } else {
409 raw_mean_return
410 };
411
412 let rank_eligible =
413 dsr >= cfg.dsr_bar && passed_k && process_ok && bootstrap_p < cfg.alpha && mandate_ok;
414 let composite = if rank_eligible { dsr } else { 0.0 };
415
416 CompositeScore {
417 agent_id: sub.agent_id.clone(),
418 deflated_sharpe: dsr,
419 psr,
420 passed_k,
421 process_ok,
422 bootstrap_p,
423 raw_mean_return,
424 rank_eligible,
425 composite,
426 alpha: 0.0,
427 beta: 0.0,
428 calibration_brier,
429 edge_half_life: edge_half_life_periods,
430 field_reality_check_p: 1.0,
431 max_drawdown: mdd,
432 mandate_ok,
433 turnover,
434 pareto_optimal: false,
435 step_down_significant: false,
436 confidence_weighted_return,
437 cost,
438 return_per_cost,
439 field_spa_p: 1.0,
440 field_spa_consistent_p: 1.0,
441 field_crowdedness: None,
442 in_sample_trials: sub.in_sample_trials,
443 effective_n_trials,
444 dsr_percentile,
445 selection_median_dsr,
446 selection_gap,
447 rank_ordinal: 0,
448 rolling_min_sharpe,
449 rolling_frac_positive,
450 sortino,
451 downside_deviation,
452 dsr_per_cost,
453 process_floored,
454 realized_floored_return,
455 }
456}
457
458pub fn rank(subs: &[AgentSubmission], cfg: &ScoreConfig) -> Vec<CompositeScore> {
490 let pooled: Vec<Vec<f64>> = subs
493 .iter()
494 .map(|s| {
495 s.runs
496 .iter()
497 .flat_map(|r| r.returns.iter().copied())
498 .collect()
499 })
500 .collect();
501 let min_len = pooled.iter().map(Vec::len).min().unwrap_or(0);
502 let n_agents = pooled.len().max(1) as f64;
503 let market: Vec<f64> = (0..min_len)
504 .map(|i| pooled.iter().map(|p| p[i]).sum::<f64>() / n_agents)
505 .collect();
506
507 let mut scores: Vec<CompositeScore> = subs
508 .iter()
509 .enumerate()
510 .map(|(idx, s)| {
511 let mut cs = score_agent(s, cfg);
512 if min_len >= 2 {
513 let (alpha, beta) = crate::attribution::alpha_beta(&pooled[idx], &market);
514 cs.alpha = alpha;
515 cs.beta = beta;
516 }
517 cs
518 })
519 .collect();
520
521 if min_len >= 2 {
524 let field_excess: Vec<Vec<f64>> = pooled
525 .iter()
526 .map(|p| {
527 p.iter()
528 .take(min_len)
529 .zip(market.iter())
530 .map(|(a, m)| a - m)
531 .collect()
532 })
533 .collect();
534 let rc_p = crate::significance::reality_check_pvalue(
535 &field_excess,
536 cfg.bootstrap_seed,
537 cfg.n_boot,
538 cfg.block_prob,
539 );
540 let spa_p = crate::significance::spa_pvalue(
541 &field_excess,
542 cfg.bootstrap_seed,
543 cfg.n_boot,
544 cfg.block_prob,
545 );
546 let spa_c_p = crate::significance::spa_consistent_pvalue(
547 &field_excess,
548 cfg.bootstrap_seed,
549 cfg.n_boot,
550 cfg.block_prob,
551 );
552 for cs in scores.iter_mut() {
553 cs.field_reality_check_p = rc_p;
554 cs.field_spa_p = spa_p;
555 cs.field_spa_consistent_p = spa_c_p;
556 }
557 let sd = crate::significance::step_down_significant(
558 &field_excess,
559 cfg.bootstrap_seed,
560 cfg.n_boot,
561 cfg.block_prob,
562 cfg.alpha,
563 );
564 for (cs, s) in scores.iter_mut().zip(sd) {
565 cs.step_down_significant = s;
566 }
567 }
568
569 if min_len >= 2 && pooled.len() >= 2 {
574 let aligned: Vec<&[f64]> = pooled.iter().map(|p| &p[..min_len]).collect();
575 for (idx, cs) in scores.iter_mut().enumerate() {
576 let peers: Vec<&[f64]> = aligned
577 .iter()
578 .enumerate()
579 .filter(|&(j, _)| j != idx)
580 .map(|(_, &p)| p)
581 .collect();
582 cs.field_crowdedness = crate::correlation::crowdedness(aligned[idx], &peers).mean_corr;
583 }
584 }
585
586 let pareto: Vec<bool> = (0..scores.len())
588 .map(|i| !(0..scores.len()).any(|j| j != i && dominates(&scores[j], &scores[i])))
589 .collect();
590 for (cs, p) in scores.iter_mut().zip(pareto) {
591 cs.pareto_optimal = p;
592 }
593
594 let sort_key = |s: &CompositeScore| match cfg.rank_key {
595 RankKey::DeflatedSharpe => s.composite,
596 RankKey::Alpha => {
597 if s.rank_eligible {
598 s.alpha
599 } else {
600 f64::NEG_INFINITY
601 }
602 }
603 };
604 scores.sort_by(|a, b| {
605 b.rank_eligible
606 .cmp(&a.rank_eligible)
607 .then(
608 sort_key(b)
609 .partial_cmp(&sort_key(a))
610 .unwrap_or(std::cmp::Ordering::Equal),
611 )
612 .then(
613 b.raw_mean_return
614 .partial_cmp(&a.raw_mean_return)
615 .unwrap_or(std::cmp::Ordering::Equal),
616 )
617 });
618
619 let mut ord = 0usize;
622 for cs in scores.iter_mut() {
623 if cs.rank_eligible {
624 ord += 1;
625 cs.rank_ordinal = ord;
626 }
627 }
628 scores
629}
630
631#[cfg(test)]
632mod tests {
633 use super::*;
634 use crate::process::ProcessEvent;
635
636 fn run(mean_ret: f64, amp: f64, n: usize) -> Run {
638 let returns = (0..n)
639 .map(|i| mean_ret + amp * (i as f64 * 0.7).sin())
640 .collect();
641 Run {
642 returns,
643 trace: Trace::default(),
644 confidences: Vec::new(),
645 outcomes: Vec::new(),
646 cost: 0.0,
647 }
648 }
649
650 fn agent(id: &str, runs: Vec<Run>) -> AgentSubmission {
651 AgentSubmission {
652 agent_id: id.to_string(),
653 runs,
654 in_sample_trials: 0,
655 candidates: Vec::new(),
656 }
657 }
658
659 #[test]
660 fn skilled_is_eligible() {
661 let s = score_agent(
662 &agent("skilled", (0..5).map(|_| run(0.002, 0.0005, 60)).collect()),
663 &ScoreConfig::default(),
664 );
665 assert!(s.rank_eligible, "skilled should be eligible: {s:?}");
666 assert!(s.passed_k && s.process_ok);
667 }
668
669 #[test]
670 fn lucky_high_return_fails_pass_k() {
671 let mut runs = vec![run(0.02, 0.002, 60)];
674 runs.extend((0..4).map(|_| run(0.0, 0.003, 60)));
675 let s = score_agent(&agent("lucky", runs), &ScoreConfig::default());
676 assert!(!s.passed_k, "lucky should fail pass^k");
677 assert!(!s.rank_eligible, "lucky must not be rank-eligible: {s:?}");
678 }
679
680 #[test]
681 fn process_violator_is_disqualified() {
682 let mut runs: Vec<Run> = (0..5).map(|_| run(0.002, 0.0005, 60)).collect();
683 runs[0].trace.events.push(ProcessEvent::OrderPlaced {
684 risk_gate_passed: false,
685 });
686 let s = score_agent(&agent("violator", runs), &ScoreConfig::default());
687 assert!(!s.process_ok);
688 assert!(!s.rank_eligible, "a risk-gate bypass must disqualify");
689 }
690
691 #[test]
694 fn deflation_demotes_luck() {
695 let skilled = agent("skilled", (0..5).map(|_| run(0.002, 0.0005, 60)).collect());
696 let lucky = {
697 let mut runs = vec![run(0.02, 0.002, 60)];
698 runs.extend((0..4).map(|_| run(0.0, 0.003, 60)));
699 agent("lucky", runs)
700 };
701 let board = rank(&[lucky.clone(), skilled.clone()], &ScoreConfig::default());
702
703 let lucky_raw = board
705 .iter()
706 .find(|s| s.agent_id == "lucky")
707 .unwrap()
708 .raw_mean_return;
709 let skilled_raw = board
710 .iter()
711 .find(|s| s.agent_id == "skilled")
712 .unwrap()
713 .raw_mean_return;
714 assert!(
715 lucky_raw > skilled_raw,
716 "lucky raw {lucky_raw} should exceed skilled {skilled_raw}"
717 );
718
719 assert_eq!(board[0].agent_id, "skilled");
721 assert!(board[0].rank_eligible && !board[1].rank_eligible);
722 }
723
724 #[test]
725 fn confidence_weighting_rewards_conviction() {
726 let win = Run {
729 returns: vec![0.01; 30],
730 trace: Trace::default(),
731 confidences: vec![0.9; 30],
732 outcomes: Vec::new(),
733 cost: 0.0,
734 };
735 let lose = Run {
736 returns: vec![-0.005; 30],
737 trace: Trace::default(),
738 confidences: vec![0.1; 30],
739 outcomes: Vec::new(),
740 cost: 0.0,
741 };
742 let s = score_agent(&agent("conv", vec![win, lose]), &ScoreConfig::default());
743 assert!(
744 s.confidence_weighted_return > s.raw_mean_return,
745 "cwr {} should beat raw {}",
746 s.confidence_weighted_return,
747 s.raw_mean_return
748 );
749 }
750
751 #[test]
752 fn cost_efficiency_reported_only_with_cost() {
753 let mut r = run(0.002, 0.0005, 30);
754 r.cost = 4.0;
755 let s = score_agent(&agent("paid", vec![r]), &ScoreConfig::default());
756 assert_eq!(s.cost, 4.0);
757 assert!(s.return_per_cost.is_some());
758
759 let free = score_agent(
760 &agent("free", vec![run(0.002, 0.0005, 30)]),
761 &ScoreConfig::default(),
762 );
763 assert!(free.return_per_cost.is_none());
764 }
765
766 #[test]
767 fn in_sample_search_raises_the_deflation_bar() {
768 let runs: Vec<Run> = (0..5).map(|_| run(0.002, 0.0005, 60)).collect();
769 let base = score_agent(&agent("base", runs.clone()), &ScoreConfig::default());
770 let mut over = agent("over", runs);
771 over.in_sample_trials = 5000;
772 let s = score_agent(&over, &ScoreConfig::default());
773 assert_eq!(s.effective_n_trials, 5050);
774 assert!(
775 s.deflated_sharpe <= base.deflated_sharpe,
776 "more in-sample search must not raise DSR ({} vs {})",
777 s.deflated_sharpe,
778 base.deflated_sharpe
779 );
780 }
781
782 #[test]
783 fn percentile_reported_only_with_reference() {
784 let none = score_agent(
785 &agent("p", (0..5).map(|_| run(0.002, 0.0005, 60)).collect()),
786 &ScoreConfig::default(),
787 );
788 assert!(none.dsr_percentile.is_none());
789 let cfg = ScoreConfig {
790 reference_dsr_population: vec![0.0, 0.3, 0.6, 0.9],
791 ..ScoreConfig::default()
792 };
793 let some = score_agent(
794 &agent("p", (0..5).map(|_| run(0.002, 0.0005, 60)).collect()),
795 &cfg,
796 );
797 assert!(some.dsr_percentile.is_some());
798 }
799
800 #[test]
801 fn rolling_sharpe_reported_for_long_tracks() {
802 let s = score_agent(
803 &agent("roll", (0..5).map(|_| run(0.002, 0.0005, 60)).collect()),
804 &ScoreConfig::default(),
805 );
806 assert!(s.rolling_min_sharpe.is_some());
808 let fp = s.rolling_frac_positive.expect("reported");
809 assert!(
810 (fp - 1.0).abs() < 1e-12,
811 "steady edge → all windows positive"
812 );
813 }
814
815 #[test]
816 fn rolling_sharpe_none_when_track_too_short() {
817 let cfg = ScoreConfig {
818 rolling_window: 100,
819 ..ScoreConfig::default()
820 };
821 let s = score_agent(&agent("short", vec![run(0.002, 0.0005, 30)]), &cfg);
822 assert!(s.rolling_min_sharpe.is_none());
823 assert!(s.rolling_frac_positive.is_none());
824 }
825
826 #[test]
827 fn dsr_per_cost_reported_only_with_cost() {
828 let mut r = run(0.002, 0.0005, 60);
829 r.cost = 5.0;
830 let paid = score_agent(&agent("paid", vec![r]), &ScoreConfig::default());
831 let dpc = paid.dsr_per_cost.expect("reported with cost");
832 assert!((dpc - paid.deflated_sharpe / 5.0).abs() < 1e-12);
833
834 let free = score_agent(
835 &agent("free", vec![run(0.002, 0.0005, 60)]),
836 &ScoreConfig::default(),
837 );
838 assert!(free.dsr_per_cost.is_none());
839 }
840
841 #[test]
842 fn process_violation_floors_realized_return() {
843 let mut runs: Vec<Run> = (0..5).map(|_| run(0.02, 0.0005, 60)).collect();
844 runs[0].trace.events.push(ProcessEvent::OrderPlaced {
845 risk_gate_passed: false,
846 });
847 let s = score_agent(&agent("cheater", runs), &ScoreConfig::default());
848 assert!(s.process_floored, "block violation must set the floor flag");
849 assert_eq!(
850 s.realized_floored_return, 0.0,
851 "floored to no-skill baseline"
852 );
853 assert!(
854 s.raw_mean_return > 0.0,
855 "raw return is preserved un-floored"
856 );
857 assert!(!s.rank_eligible, "eligibility logic intact");
858 }
859
860 #[test]
861 fn clean_process_is_not_floored() {
862 let s = score_agent(
863 &agent("clean", (0..5).map(|_| run(0.002, 0.0005, 60)).collect()),
864 &ScoreConfig::default(),
865 );
866 assert!(!s.process_floored);
867 assert_eq!(s.realized_floored_return, s.raw_mean_return);
868 }
869
870 #[test]
871 fn rank_ordinal_is_one_based_among_eligible() {
872 let skilled = agent("skilled", (0..5).map(|_| run(0.002, 0.0005, 60)).collect());
873 let lucky = {
874 let mut runs = vec![run(0.02, 0.002, 60)];
875 runs.extend((0..4).map(|_| run(0.0, 0.003, 60)));
876 agent("lucky", runs)
877 };
878 let board = rank(&[lucky, skilled], &ScoreConfig::default());
879 assert_eq!(board[0].rank_ordinal, 1, "leader is ordinal 1");
880 assert_eq!(board[1].rank_ordinal, 0, "ineligible gets ordinal 0");
881 }
882}