multi_skill/
metrics.rs

1extern crate overload;
2
3use crate::systems::{get_participant_ratings, outcome_free, PlayersByName, Rating};
4use overload::overload;
5use std::fmt;
6use std::ops;
7
8pub type ParticipantRatings = [(Rating, usize, usize)];
9pub type WeightAndSum = (f64, f64);
10pub type Metric = Box<dyn Fn(&ParticipantRatings) -> f64>;
11
12// A data structure for storing the various performance metrics we want to analyze
13pub struct PerformanceReport {
14    pub metrics_wt_sum: Vec<WeightAndSum>,
15}
16
17impl fmt::Display for PerformanceReport {
18    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
19        let averaged: Vec<f64> = self
20            .metrics_wt_sum
21            .iter()
22            .map(|&(wt, sum)| sum / wt)
23            .collect();
24        write!(f, "{:?})", averaged)
25    }
26}
27
28impl PerformanceReport {
29    pub fn new(num_metrics: usize) -> Self {
30        Self {
31            metrics_wt_sum: vec![(0., 0.); num_metrics],
32        }
33    }
34}
35
36overload!((a: ?PerformanceReport) + (b: ?PerformanceReport) -> PerformanceReport {
37    assert_eq!(a.metrics_wt_sum.len(), b.metrics_wt_sum.len());
38    let metrics_wt_sum = a.metrics_wt_sum.iter().zip(b.metrics_wt_sum.iter()).map(|((a_w, a_sum), (b_w, b_sum))| (a_w+b_w, a_sum+b_sum)).collect();
39    PerformanceReport {
40        metrics_wt_sum
41    }
42});
43
44overload!((a: &mut PerformanceReport) += (b: ?PerformanceReport) {
45    assert_eq!(a.metrics_wt_sum.len(), b.metrics_wt_sum.len());
46    for ((a_w, a_sum), (b_w, b_sum)) in a.metrics_wt_sum.iter_mut().zip(b.metrics_wt_sum.iter()) {
47        *a_w += b_w;
48        *a_sum += b_sum;
49    }
50});
51
52// Returns only the players whose 0-indexed rank is less than k
53// May return more than k players if there are ties
54pub fn top_k(standings: &ParticipantRatings, k: usize) -> &ParticipantRatings {
55    let idx_first_ge_k = standings
56        .binary_search_by(|&(_, lo, _)| lo.cmp(&k).then(std::cmp::Ordering::Greater))
57        .unwrap_err();
58    &standings[0..idx_first_ge_k]
59}
60
61pub fn pairwise_metric(standings: &ParticipantRatings) -> WeightAndSum {
62    if outcome_free(standings) {
63        return (0., 0.);
64    }
65    // Compute topk (frac. of inverted pairs) metric
66    let mut correct_pairs = 0.;
67    let mut total_pairs = 0.;
68    for &(loser_rating, loser_lo, _) in standings {
69        for &(winner_rating, winner_lo, _) in standings {
70            if winner_lo >= loser_lo as usize {
71                break;
72            }
73            if winner_rating.mu > loser_rating.mu {
74                correct_pairs += 2.;
75            }
76            total_pairs += 2.;
77        }
78    }
79
80    let n = standings.len() as f64;
81    let tied_pairs = n * (n - 1.) - total_pairs;
82    (n, 100. * (correct_pairs + tied_pairs) / (n - 1.))
83}
84
85pub fn percentile_distance_metric(standings: &ParticipantRatings) -> WeightAndSum {
86    if outcome_free(standings) {
87        return (0., 0.);
88    }
89    // Compute avg percentile distance metric
90    let mut standings_by_rating = Vec::from(standings);
91    standings_by_rating.sort_by(|a, b| b.0.mu.partial_cmp(&a.0.mu).unwrap());
92
93    let mut sum_error = 0.;
94    for (i, &(_, lo, hi)) in standings_by_rating.iter().enumerate() {
95        let closest_to_i = i.max(lo).min(hi);
96        sum_error += (i as f64 - closest_to_i as f64).abs();
97    }
98
99    let n = standings.len() as f64;
100    (n, 100. * sum_error / (n - 1.))
101}
102
103pub fn cross_entropy_metric(standings: &ParticipantRatings, scale: f64) -> WeightAndSum {
104    if outcome_free(standings) {
105        return (0., 0.);
106    }
107    // Compute base 2 cross-entropy from the logistic Elo formula
108    // The default value of scale reported in the paper is 400,
109    // all others can be seen as applying to a scaled version of the ratings
110    let mut sum_ce = 0.;
111    for &(loser_rating, loser_lo, _) in standings {
112        for &(winner_rating, winner_lo, _) in standings {
113            if winner_lo >= loser_lo as usize {
114                break;
115            }
116            let rating_diff = loser_rating.mu - winner_rating.mu;
117            let inv_prob = 1. + 10f64.powf(rating_diff / scale);
118            sum_ce += inv_prob.log2();
119        }
120    }
121
122    let n = standings.len() as f64;
123    (n, 2. * sum_ce / (n - 1.))
124}
125
126// Meant to be modified manually to contain the desired metrics
127pub fn compute_metrics_custom(
128    players: &mut PlayersByName,
129    contest_standings: &[(String, usize, usize)],
130) -> PerformanceReport {
131    let everyone = get_participant_ratings(players, contest_standings, 0);
132    let experienced = get_participant_ratings(players, contest_standings, 5);
133    let top100 = top_k(&everyone, 100);
134
135    let mut metrics_wt_sum = vec![
136        pairwise_metric(&everyone),
137        pairwise_metric(&experienced),
138        pairwise_metric(top100),
139        percentile_distance_metric(&everyone),
140        percentile_distance_metric(&experienced),
141        percentile_distance_metric(top100),
142    ];
143    for scale in (200..=600).step_by(50) {
144        // In post-processing, only the best of these values should be kept, along with its scale
145        metrics_wt_sum.push(cross_entropy_metric(&experienced, scale as f64));
146    }
147
148    PerformanceReport { metrics_wt_sum }
149}
multi_skill/metrics.rs

multi_skill/
metrics.rs