Skip to main content

cp_validator/
rating.rs

1//! OpenSkill Bayesian rating implementation.
2//!
3//! Implements the Thurstone-Mosteller model used by OpenSkill for pairwise
4//! skill estimation. Each player (contributor or peer) has a mean skill
5//! estimate (mu) and an uncertainty (sigma). Ratings update through pairwise
6//! comparisons using truncated Gaussian updates.
7//!
8//! Per CP-015 section 8: the core algorithm follows Weng and Lin (2011),
9//! adapted from the TrueSkill framework.
10
11use serde::{Deserialize, Serialize};
12use std::f64::consts::{FRAC_1_SQRT_2, PI};
13
14/// Performance variance parameter. Represents the standard deviation of
15/// performance around true skill. Set to default_mu / 6 following the
16/// TrueSkill convention.
17const BETA: f64 = 25.0 / 6.0;
18
19/// Minimum sigma floor to prevent sigma from collapsing to zero.
20const KAPPA: f64 = 0.0001;
21
22/// Default mean skill for a new player.
23pub const DEFAULT_MU: f64 = 25.0;
24
25/// Default uncertainty for a new player (mu / 3).
26pub const DEFAULT_SIGMA: f64 = 25.0 / 3.0; // 8.333...
27
28/// An OpenSkill Bayesian rating with mean skill estimate and uncertainty.
29#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
30pub struct Rating {
31    /// Mean skill estimate.
32    pub mu: f64,
33    /// Uncertainty (standard deviation of the skill belief).
34    pub sigma: f64,
35}
36
37impl Rating {
38    /// Create a new rating with the given mu and sigma.
39    pub fn new(mu: f64, sigma: f64) -> Self {
40        Self { mu, sigma }
41    }
42
43    /// Conservative skill estimate: mu - 2*sigma.
44    ///
45    /// This is the lower bound of a ~95% confidence interval for the
46    /// player's true skill. It penalizes uncertainty, meaning players
47    /// with few observations are ranked lower than confidently-good players.
48    pub fn conservative_score(&self) -> f64 {
49        self.mu - 2.0 * self.sigma
50    }
51}
52
53impl Default for Rating {
54    fn default() -> Self {
55        Self {
56            mu: DEFAULT_MU,
57            sigma: DEFAULT_SIGMA,
58        }
59    }
60}
61
62/// Standard normal probability density function.
63fn normal_pdf(x: f64) -> f64 {
64    (-0.5 * x * x).exp() / (2.0 * PI).sqrt()
65}
66
67/// Standard normal cumulative distribution function.
68///
69/// Uses an approximation based on the complementary error function.
70/// Accurate to about 1.5e-7 relative error.
71fn normal_cdf(x: f64) -> f64 {
72    0.5 * erfc(-x * FRAC_1_SQRT_2)
73}
74
75/// Complementary error function approximation.
76///
77/// Uses the rational approximation from Abramowitz and Stegun (1964),
78/// formula 7.1.26, which provides accuracy to about 1.5e-7.
79fn erfc(x: f64) -> f64 {
80    // For negative x, use erfc(-x) = 2 - erfc(x)
81    if x < 0.0 {
82        return 2.0 - erfc(-x);
83    }
84
85    let t = 1.0 / (1.0 + 0.3275911 * x);
86    let poly = t
87        * (0.254829592
88            + t * (-0.284496736 + t * (1.421413741 + t * (-1.453152027 + t * 1.061405429))));
89
90    poly * (-x * x).exp()
91}
92
93/// Perform a pairwise rating update after a comparison where the winner
94/// demonstrated higher quality than the loser.
95///
96/// Returns the updated (winner_rating, loser_rating).
97///
98/// Per CP-015 section 8: follows the Thurstone-Mosteller model. The update
99/// magnitude depends on the surprise of the outcome. If the winner was already
100/// rated much higher than the loser, the update is small. If the loser was
101/// rated higher (an upset), the update is large.
102pub fn pairwise_update(winner: &Rating, loser: &Rating) -> (Rating, Rating) {
103    let sigma_w_sq = winner.sigma * winner.sigma;
104    let sigma_l_sq = loser.sigma * loser.sigma;
105
106    // c combines performance variance with rating uncertainties
107    let c = (2.0 * BETA * BETA + sigma_w_sq + sigma_l_sq).sqrt();
108
109    // t is the normalized skill difference
110    let t = (winner.mu - loser.mu) / c;
111
112    // Truncated Gaussian update functions
113    let cdf_t = normal_cdf(t);
114
115    // Guard against division by zero when cdf_t is extremely small.
116    // This happens when the winner is rated far below the loser.
117    if cdf_t < 1e-15 {
118        // The outcome is so surprising that we apply maximum update.
119        // Fall back to a large but bounded update.
120        let v = 10.0; // Capped v value
121        let w = v * (v + t.abs());
122
123        let new_winner = Rating::new(
124            winner.mu + (sigma_w_sq / c) * v,
125            winner.sigma * (1.0 - (sigma_w_sq / (c * c)) * w).max(KAPPA).sqrt(),
126        );
127        let new_loser = Rating::new(
128            loser.mu - (sigma_l_sq / c) * v,
129            loser.sigma * (1.0 - (sigma_l_sq / (c * c)) * w).max(KAPPA).sqrt(),
130        );
131        return (new_winner, new_loser);
132    }
133
134    let v = normal_pdf(t) / cdf_t;
135    let w = v * (v + t);
136
137    // Winner update: mu increases, sigma decreases
138    let new_winner_mu = winner.mu + (sigma_w_sq / c) * v;
139    let new_winner_sigma = winner.sigma * (1.0 - (sigma_w_sq / (c * c)) * w).max(KAPPA).sqrt();
140
141    // Loser update: mu decreases, sigma decreases
142    let new_loser_mu = loser.mu - (sigma_l_sq / c) * v;
143    let new_loser_sigma = loser.sigma * (1.0 - (sigma_l_sq / (c * c)) * w).max(KAPPA).sqrt();
144
145    (
146        Rating::new(new_winner_mu, new_winner_sigma),
147        Rating::new(new_loser_mu, new_loser_sigma),
148    )
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    const EPSILON: f64 = 1e-6;
156
157    #[test]
158    fn test_default_rating() {
159        let r = Rating::default();
160        assert!((r.mu - 25.0).abs() < EPSILON);
161        assert!((r.sigma - 8.333333333333334).abs() < EPSILON);
162    }
163
164    #[test]
165    fn test_conservative_score_default() {
166        let r = Rating::default();
167        // mu - 2*sigma = 25.0 - 2*8.333 = 8.333
168        let score = r.conservative_score();
169        assert!((score - (25.0 - 2.0 * DEFAULT_SIGMA)).abs() < EPSILON);
170        assert!(score > 8.0 && score < 9.0);
171    }
172
173    #[test]
174    fn test_conservative_score_high_certainty() {
175        let r = Rating::new(30.0, 1.0);
176        // mu - 2*sigma = 30 - 2 = 28
177        assert!((r.conservative_score() - 28.0).abs() < EPSILON);
178    }
179
180    #[test]
181    fn test_conservative_score_high_uncertainty() {
182        let r = Rating::new(30.0, 10.0);
183        // mu - 2*sigma = 30 - 20 = 10
184        assert!((r.conservative_score() - 10.0).abs() < EPSILON);
185    }
186
187    #[test]
188    fn test_normal_pdf_at_zero() {
189        // pdf(0) = 1/sqrt(2*pi) ≈ 0.3989
190        let val = normal_pdf(0.0);
191        assert!((val - 0.3989422804014327).abs() < 1e-10);
192    }
193
194    #[test]
195    fn test_normal_pdf_symmetry() {
196        assert!((normal_pdf(1.0) - normal_pdf(-1.0)).abs() < 1e-10);
197        assert!((normal_pdf(2.5) - normal_pdf(-2.5)).abs() < 1e-10);
198    }
199
200    #[test]
201    fn test_normal_cdf_at_zero() {
202        // cdf(0) = 0.5
203        assert!((normal_cdf(0.0) - 0.5).abs() < 1e-6);
204    }
205
206    #[test]
207    fn test_normal_cdf_at_large_positive() {
208        // cdf(5) ≈ 1.0
209        assert!((normal_cdf(5.0) - 1.0).abs() < 1e-5);
210    }
211
212    #[test]
213    fn test_normal_cdf_at_large_negative() {
214        // cdf(-5) ≈ 0.0
215        assert!(normal_cdf(-5.0) < 1e-5);
216    }
217
218    #[test]
219    fn test_normal_cdf_known_values() {
220        // cdf(1) ≈ 0.8413
221        assert!((normal_cdf(1.0) - 0.8413).abs() < 1e-3);
222        // cdf(-1) ≈ 0.1587
223        assert!((normal_cdf(-1.0) - 0.1587).abs() < 1e-3);
224        // cdf(2) ≈ 0.9772
225        assert!((normal_cdf(2.0) - 0.9772).abs() < 1e-3);
226    }
227
228    #[test]
229    fn test_pairwise_equal_ratings() {
230        let a = Rating::default();
231        let b = Rating::default();
232
233        let (winner, loser) = pairwise_update(&a, &b);
234
235        // Winner should gain skill, loser should lose skill
236        assert!(winner.mu > a.mu);
237        assert!(loser.mu < b.mu);
238
239        // Both should have reduced uncertainty
240        assert!(winner.sigma < a.sigma);
241        assert!(loser.sigma < b.sigma);
242
243        // Updates should be symmetric for equal starting ratings
244        let gain = winner.mu - a.mu;
245        let loss = b.mu - loser.mu;
246        assert!((gain - loss).abs() < 1e-10);
247    }
248
249    #[test]
250    fn test_pairwise_expected_outcome() {
251        // When the higher-rated player wins, the update should be small
252        let strong = Rating::new(30.0, 5.0);
253        let weak = Rating::new(20.0, 5.0);
254
255        let (new_strong, new_weak) = pairwise_update(&strong, &weak);
256
257        // Strong player still wins, weak still loses
258        assert!(new_strong.mu > strong.mu);
259        assert!(new_weak.mu < weak.mu);
260
261        // But the magnitude should be small because this was expected
262        let gain = new_strong.mu - strong.mu;
263        assert!(gain < 2.0, "Expected small gain for expected outcome, got {}", gain);
264    }
265
266    #[test]
267    fn test_pairwise_upset() {
268        // When the lower-rated player wins, the update should be large
269        let strong = Rating::new(30.0, 5.0);
270        let weak = Rating::new(20.0, 5.0);
271
272        // Weak player wins (upset)
273        let (new_weak, _new_strong) = pairwise_update(&weak, &strong);
274
275        // The weak player gains more than in the expected case
276        let gain = new_weak.mu - weak.mu;
277        assert!(gain > 1.0, "Expected larger gain for upset, got {}", gain);
278    }
279
280    #[test]
281    fn test_pairwise_sigma_never_negative() {
282        // Even after many updates, sigma should remain positive
283        let mut a = Rating::default();
284        let mut b = Rating::default();
285
286        for _ in 0..100 {
287            let (new_a, new_b) = pairwise_update(&a, &b);
288            a = new_a;
289            b = new_b;
290        }
291
292        assert!(a.sigma > 0.0);
293        assert!(b.sigma > 0.0);
294    }
295
296    #[test]
297    fn test_pairwise_sigma_decreases() {
298        let a = Rating::default();
299        let b = Rating::default();
300
301        let (winner, loser) = pairwise_update(&a, &b);
302
303        // Sigma should decrease for both players after an observation
304        assert!(winner.sigma < a.sigma);
305        assert!(loser.sigma < b.sigma);
306    }
307
308    #[test]
309    fn test_convergence_consistent_winner() {
310        // A player that always wins should converge to a high rating
311        let mut winner = Rating::default();
312        let mut loser = Rating::default();
313
314        for _ in 0..50 {
315            let (new_w, new_l) = pairwise_update(&winner, &loser);
316            winner = new_w;
317            loser = new_l;
318        }
319
320        // Winner should be well above the starting mu
321        assert!(winner.mu > 35.0, "Winner mu should be high, got {}", winner.mu);
322        // Loser should be well below
323        assert!(loser.mu < 15.0, "Loser mu should be low, got {}", loser.mu);
324
325        // Both should have reduced uncertainty after 50 rounds
326        // (sigma converges slowly in the Thurstone-Mosteller model due to KAPPA floor)
327        assert!(winner.sigma < 4.0, "Winner sigma should be low, got {}", winner.sigma);
328        assert!(loser.sigma < 4.0, "Loser sigma should be low, got {}", loser.sigma);
329
330        // Conservative scores should reflect the separation
331        assert!(winner.conservative_score() > loser.conservative_score());
332    }
333
334    #[test]
335    fn test_transitivity() {
336        // If A beats B and B beats C, A's rating should be above C's
337        let mut a = Rating::default();
338        let mut b = Rating::default();
339        let mut c = Rating::default();
340
341        // A beats B five times
342        for _ in 0..5 {
343            let (new_a, new_b) = pairwise_update(&a, &b);
344            a = new_a;
345            b = new_b;
346        }
347
348        // B beats C five times
349        for _ in 0..5 {
350            let (new_b, new_c) = pairwise_update(&b, &c);
351            b = new_b;
352            c = new_c;
353        }
354
355        // A > B > C in conservative score
356        assert!(a.conservative_score() > b.conservative_score());
357        assert!(b.conservative_score() > c.conservative_score());
358        assert!(a.conservative_score() > c.conservative_score());
359    }
360
361    #[test]
362    fn test_rating_serialization() {
363        let r = Rating::new(28.5, 6.1);
364        let json = serde_json::to_string(&r).unwrap();
365        let deserialized: Rating = serde_json::from_str(&json).unwrap();
366        assert!((deserialized.mu - r.mu).abs() < EPSILON);
367        assert!((deserialized.sigma - r.sigma).abs() < EPSILON);
368    }
369
370    #[test]
371    fn test_erfc_basic_values() {
372        // erfc(0) = 1.0
373        assert!((erfc(0.0) - 1.0).abs() < 1e-6);
374        // erfc(large) ≈ 0
375        assert!(erfc(5.0) < 1e-5);
376        // erfc(-large) ≈ 2
377        assert!((erfc(-5.0) - 2.0).abs() < 1e-5);
378    }
379
380    #[test]
381    fn test_pairwise_with_different_sigmas() {
382        // A certain player vs an uncertain player
383        let certain = Rating::new(25.0, 2.0);
384        let uncertain = Rating::new(25.0, 8.0);
385
386        let (new_certain, new_uncertain) = pairwise_update(&certain, &uncertain);
387
388        // Both gain/lose, but the uncertain player's sigma should shrink more
389        assert!(new_certain.mu > certain.mu);
390        assert!(new_uncertain.mu < uncertain.mu);
391
392        // The uncertain player's rating should change more
393        let certain_change = (new_certain.mu - certain.mu).abs();
394        let uncertain_change = (new_uncertain.mu - uncertain.mu).abs();
395        assert!(
396            uncertain_change > certain_change,
397            "Uncertain player should change more: {} vs {}",
398            uncertain_change,
399            certain_change
400        );
401    }
402
403    #[test]
404    fn test_conservative_score_ordering() {
405        // A player with high mu but high sigma should rank lower than
406        // a player with moderate mu but low sigma
407        let uncertain_good = Rating::new(35.0, 10.0);
408        let certain_moderate = Rating::new(28.0, 2.0);
409
410        // 35 - 20 = 15 vs 28 - 4 = 24
411        assert!(
412            certain_moderate.conservative_score() > uncertain_good.conservative_score(),
413            "Certain moderate ({}) should outrank uncertain good ({})",
414            certain_moderate.conservative_score(),
415            uncertain_good.conservative_score()
416        );
417    }
418}