Skip to main content

cp_validator/
rating.rs

1//! Bayesian rating implementation using skillratings' Weng-Lin model.
2//!
3//! Wraps the `skillratings` crate's Weng-Lin implementation for pairwise
4//! skill estimation. Each player (contributor or peer) has a mean skill
5//! estimate (mu) and an uncertainty (sigma). Ratings update through pairwise
6//! comparisons.
7//!
8//! Per CP-015 section 8: the core algorithm follows Weng and Lin (2011),
9//! adapted from the `TrueSkill` framework.
10
11use serde::{Deserialize, Serialize};
12use skillratings::weng_lin::{weng_lin, WengLinConfig, WengLinRating};
13use skillratings::Outcomes;
14
15/// Default mean skill for a new player.
16pub const DEFAULT_MU: f64 = 25.0;
17
18/// Default uncertainty for a new player (mu / 3).
19pub const DEFAULT_SIGMA: f64 = 25.0 / 3.0; // 8.333...
20
21const WENG_LIN_CONFIG: WengLinConfig = WengLinConfig {
22    beta: 25.0 / 6.0,
23    uncertainty_tolerance: 0.0001,
24};
25
26/// A Bayesian rating with mean skill estimate and uncertainty.
27#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
28pub struct Rating {
29    /// Mean skill estimate.
30    pub mu: f64,
31    /// Uncertainty (standard deviation of the skill belief).
32    pub sigma: f64,
33}
34
35impl Rating {
36    /// Create a new rating with the given mu and sigma.
37    pub fn new(mu: f64, sigma: f64) -> Self {
38        Self { mu, sigma }
39    }
40
41    /// Conservative skill estimate: mu - 2*sigma.
42    ///
43    /// This is the lower bound of a ~95% confidence interval for the
44    /// player's true skill. It penalizes uncertainty, meaning players
45    /// with few observations are ranked lower than confidently-good players.
46    pub fn conservative_score(&self) -> f64 {
47        self.mu - 2.0 * self.sigma
48    }
49}
50
51impl Default for Rating {
52    fn default() -> Self {
53        Self {
54            mu: DEFAULT_MU,
55            sigma: DEFAULT_SIGMA,
56        }
57    }
58}
59
60/// Perform a pairwise rating update after a comparison where the winner
61/// demonstrated higher quality than the loser.
62///
63/// Returns the updated (`winner_rating`, `loser_rating`).
64///
65/// Per CP-015 section 8: follows the Weng-Lin model. The update
66/// magnitude depends on the surprise of the outcome. If the winner was already
67/// rated much higher than the loser, the update is small. If the loser was
68/// rated higher (an upset), the update is large.
69pub fn pairwise_update(winner: &Rating, loser: &Rating) -> (Rating, Rating) {
70    let w = WengLinRating {
71        rating: winner.mu,
72        uncertainty: winner.sigma,
73    };
74    let l = WengLinRating {
75        rating: loser.mu,
76        uncertainty: loser.sigma,
77    };
78
79    let (new_w, new_l) = weng_lin(&w, &l, &Outcomes::WIN, &WENG_LIN_CONFIG);
80
81    (
82        Rating::new(new_w.rating, new_w.uncertainty),
83        Rating::new(new_l.rating, new_l.uncertainty),
84    )
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90
91    const EPSILON: f64 = 1e-6;
92
93    #[test]
94    fn test_default_rating() {
95        let r = Rating::default();
96        assert!((r.mu - 25.0).abs() < EPSILON);
97        assert!((r.sigma - 8.333333333333334).abs() < EPSILON);
98    }
99
100    #[test]
101    fn test_conservative_score_default() {
102        let r = Rating::default();
103        // mu - 2*sigma = 25.0 - 2*8.333 = 8.333
104        let score = r.conservative_score();
105        assert!((score - (25.0 - 2.0 * DEFAULT_SIGMA)).abs() < EPSILON);
106        assert!(score > 8.0 && score < 9.0);
107    }
108
109    #[test]
110    fn test_conservative_score_high_certainty() {
111        let r = Rating::new(30.0, 1.0);
112        // mu - 2*sigma = 30 - 2 = 28
113        assert!((r.conservative_score() - 28.0).abs() < EPSILON);
114    }
115
116    #[test]
117    fn test_conservative_score_high_uncertainty() {
118        let r = Rating::new(30.0, 10.0);
119        // mu - 2*sigma = 30 - 20 = 10
120        assert!((r.conservative_score() - 10.0).abs() < EPSILON);
121    }
122
123    #[test]
124    fn test_pairwise_equal_ratings() {
125        let a = Rating::default();
126        let b = Rating::default();
127
128        let (winner, loser) = pairwise_update(&a, &b);
129
130        // Winner should gain skill, loser should lose skill
131        assert!(winner.mu > a.mu);
132        assert!(loser.mu < b.mu);
133
134        // Both should have reduced uncertainty
135        assert!(winner.sigma < a.sigma);
136        assert!(loser.sigma < b.sigma);
137
138        // Updates should be symmetric for equal starting ratings
139        let gain = winner.mu - a.mu;
140        let loss = b.mu - loser.mu;
141        assert!((gain - loss).abs() < 1e-10);
142    }
143
144    #[test]
145    fn test_pairwise_expected_outcome() {
146        // When the higher-rated player wins, the update should be small
147        let strong = Rating::new(30.0, 5.0);
148        let weak = Rating::new(20.0, 5.0);
149
150        let (new_strong, new_weak) = pairwise_update(&strong, &weak);
151
152        // Strong player still wins, weak still loses
153        assert!(new_strong.mu > strong.mu);
154        assert!(new_weak.mu < weak.mu);
155
156        // But the magnitude should be small because this was expected
157        let gain = new_strong.mu - strong.mu;
158        assert!(
159            gain < 2.0,
160            "Expected small gain for expected outcome, got {gain}"
161        );
162    }
163
164    #[test]
165    fn test_pairwise_upset() {
166        // When the lower-rated player wins, the update should be large
167        let strong = Rating::new(30.0, 5.0);
168        let weak = Rating::new(20.0, 5.0);
169
170        // Weak player wins (upset)
171        let (new_weak, _new_strong) = pairwise_update(&weak, &strong);
172
173        // The weak player gains more than in the expected case
174        let gain = new_weak.mu - weak.mu;
175        assert!(gain > 1.0, "Expected larger gain for upset, got {gain}");
176    }
177
178    #[test]
179    fn test_pairwise_sigma_never_negative() {
180        // Even after many updates, sigma should remain positive
181        let mut a = Rating::default();
182        let mut b = Rating::default();
183
184        for _ in 0..100 {
185            let (new_a, new_b) = pairwise_update(&a, &b);
186            a = new_a;
187            b = new_b;
188        }
189
190        assert!(a.sigma > 0.0);
191        assert!(b.sigma > 0.0);
192    }
193
194    #[test]
195    fn test_pairwise_sigma_decreases() {
196        let a = Rating::default();
197        let b = Rating::default();
198
199        let (winner, loser) = pairwise_update(&a, &b);
200
201        // Sigma should decrease for both players after an observation
202        assert!(winner.sigma < a.sigma);
203        assert!(loser.sigma < b.sigma);
204    }
205
206    #[test]
207    fn test_convergence_consistent_winner() {
208        // A player that always wins should converge to a high rating
209        let mut winner = Rating::default();
210        let mut loser = Rating::default();
211
212        for _ in 0..50 {
213            let (new_w, new_l) = pairwise_update(&winner, &loser);
214            winner = new_w;
215            loser = new_l;
216        }
217
218        // Winner should be well above the starting mu
219        assert!(
220            winner.mu > 30.0,
221            "Winner mu should be high, got {}",
222            winner.mu
223        );
224        // Loser should be well below
225        assert!(loser.mu < 20.0, "Loser mu should be low, got {}", loser.mu);
226
227        // Conservative scores should reflect the separation
228        assert!(winner.conservative_score() > loser.conservative_score());
229    }
230
231    #[test]
232    fn test_transitivity() {
233        // If A beats B and B beats C, A's rating should be above C's
234        let mut a = Rating::default();
235        let mut b = Rating::default();
236        let mut c = Rating::default();
237
238        // A beats B five times
239        for _ in 0..5 {
240            let (new_a, new_b) = pairwise_update(&a, &b);
241            a = new_a;
242            b = new_b;
243        }
244
245        // B beats C five times
246        for _ in 0..5 {
247            let (new_b, new_c) = pairwise_update(&b, &c);
248            b = new_b;
249            c = new_c;
250        }
251
252        // A > B > C in conservative score
253        assert!(a.conservative_score() > b.conservative_score());
254        assert!(b.conservative_score() > c.conservative_score());
255        assert!(a.conservative_score() > c.conservative_score());
256    }
257
258    #[test]
259    fn test_rating_serialization() {
260        let r = Rating::new(28.5, 6.1);
261        let json = serde_json::to_string(&r).unwrap();
262        let deserialized: Rating = serde_json::from_str(&json).unwrap();
263        assert!((deserialized.mu - r.mu).abs() < EPSILON);
264        assert!((deserialized.sigma - r.sigma).abs() < EPSILON);
265    }
266
267    #[test]
268    fn test_pairwise_with_different_sigmas() {
269        // A certain player vs an uncertain player
270        let certain = Rating::new(25.0, 2.0);
271        let uncertain = Rating::new(25.0, 8.0);
272
273        let (new_certain, new_uncertain) = pairwise_update(&certain, &uncertain);
274
275        // Both gain/lose, but the uncertain player's sigma should shrink more
276        assert!(new_certain.mu > certain.mu);
277        assert!(new_uncertain.mu < uncertain.mu);
278
279        // The uncertain player's rating should change more
280        let certain_change = (new_certain.mu - certain.mu).abs();
281        let uncertain_change = (new_uncertain.mu - uncertain.mu).abs();
282        assert!(
283            uncertain_change > certain_change,
284            "Uncertain player should change more: {uncertain_change} vs {certain_change}"
285        );
286    }
287
288    #[test]
289    fn test_conservative_score_ordering() {
290        // A player with high mu but high sigma should rank lower than
291        // a player with moderate mu but low sigma
292        let uncertain_good = Rating::new(35.0, 10.0);
293        let certain_moderate = Rating::new(28.0, 2.0);
294
295        // 35 - 20 = 15 vs 28 - 4 = 24
296        assert!(
297            certain_moderate.conservative_score() > uncertain_good.conservative_score(),
298            "Certain moderate ({}) should outrank uncertain good ({})",
299            certain_moderate.conservative_score(),
300            uncertain_good.conservative_score()
301        );
302    }
303}