Skip to main content

swink_agent_eval/
score.rs

1//! Scoring primitives for evaluation results.
2
3use serde::{Deserialize, Serialize};
4
5/// A numeric score in `[0.0, 1.0]` with a configurable pass threshold.
6///
7/// Each evaluator produces a `Score` for its metric. The threshold is
8/// evaluator-specific, allowing different metrics to have different
9/// pass/fail criteria.
10#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
11pub struct Score {
12    /// The numeric score, clamped to `[0.0, 1.0]`.
13    pub value: f64,
14    /// The minimum value required to pass. Defaults to `0.5`.
15    pub threshold: f64,
16}
17
18impl Score {
19    /// Create a new score with the given value and threshold.
20    ///
21    /// Values are clamped to `[0.0, 1.0]`.
22    #[must_use]
23    pub const fn new(value: f64, threshold: f64) -> Self {
24        Self {
25            value: value.clamp(0.0, 1.0),
26            threshold: threshold.clamp(0.0, 1.0),
27        }
28    }
29
30    /// A perfect passing score.
31    #[must_use]
32    pub const fn pass() -> Self {
33        Self {
34            value: 1.0,
35            threshold: 0.5,
36        }
37    }
38
39    /// A zero failing score.
40    #[must_use]
41    pub const fn fail() -> Self {
42        Self {
43            value: 0.0,
44            threshold: 0.5,
45        }
46    }
47
48    /// Derive the verdict from the score and threshold.
49    #[must_use]
50    pub fn verdict(&self) -> Verdict {
51        if self.value >= self.threshold {
52            Verdict::Pass
53        } else {
54            Verdict::Fail
55        }
56    }
57}
58
59impl Default for Score {
60    fn default() -> Self {
61        Self {
62            value: 0.0,
63            threshold: 0.5,
64        }
65    }
66}
67
68/// Binary pass/fail outcome derived from a [`Score`].
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
70#[serde(rename_all = "snake_case")]
71pub enum Verdict {
72    Pass,
73    Fail,
74}
75
76impl Verdict {
77    /// Returns `true` if the verdict is [`Verdict::Pass`].
78    #[must_use]
79    pub const fn is_pass(&self) -> bool {
80        matches!(self, Self::Pass)
81    }
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87
88    #[test]
89    fn score_pass_verdict() {
90        let s = Score::new(0.8, 0.5);
91        assert_eq!(s.verdict(), Verdict::Pass);
92    }
93
94    #[test]
95    fn score_fail_verdict() {
96        let s = Score::new(0.3, 0.5);
97        assert_eq!(s.verdict(), Verdict::Fail);
98    }
99
100    #[test]
101    fn score_at_threshold_passes() {
102        let s = Score::new(0.5, 0.5);
103        assert_eq!(s.verdict(), Verdict::Pass);
104    }
105
106    #[test]
107    fn score_clamps_to_bounds() {
108        let s = Score::new(1.5, -0.1);
109        assert!((s.value - 1.0).abs() < f64::EPSILON);
110        assert!((s.threshold - 0.0).abs() < f64::EPSILON);
111    }
112
113    #[test]
114    fn pass_and_fail_constructors() {
115        assert_eq!(Score::pass().verdict(), Verdict::Pass);
116        assert_eq!(Score::fail().verdict(), Verdict::Fail);
117    }
118
119    #[test]
120    fn verdict_is_pass() {
121        assert!(Verdict::Pass.is_pass());
122        assert!(!Verdict::Fail.is_pass());
123    }
124}