Skip to main content

synth_ai_core/data/
judgements.rs

1//! Judgement and rubric assignment types.
2//!
3//! Types for recording evaluation results and criterion scores.
4
5use serde::{Deserialize, Serialize};
6use serde_json::Value;
7use std::collections::HashMap;
8
9/// Score data for a single criterion.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct CriterionScoreData {
12    /// The numeric score.
13    pub score: f64,
14    /// Explanation/reasoning for the score.
15    #[serde(default)]
16    pub reason: Option<String>,
17    /// Weight used in aggregation.
18    #[serde(default = "default_weight")]
19    pub weight: f64,
20    /// Normalized score (0-1 range).
21    #[serde(default)]
22    pub normalized_score: Option<f64>,
23    /// Whether this criterion passed (for required criteria).
24    #[serde(default)]
25    pub passed: Option<bool>,
26}
27
28fn default_weight() -> f64 {
29    1.0
30}
31
32impl CriterionScoreData {
33    /// Create a new criterion score.
34    pub fn new(score: f64) -> Self {
35        Self {
36            score,
37            reason: None,
38            weight: 1.0,
39            normalized_score: None,
40            passed: None,
41        }
42    }
43
44    /// Create a score with reason.
45    pub fn with_reason(mut self, reason: impl Into<String>) -> Self {
46        self.reason = Some(reason.into());
47        self
48    }
49
50    /// Set the weight.
51    pub fn with_weight(mut self, weight: f64) -> Self {
52        self.weight = weight;
53        self
54    }
55
56    /// Mark as passed/failed.
57    pub fn with_passed(mut self, passed: bool) -> Self {
58        self.passed = Some(passed);
59        self
60    }
61
62    /// Calculate weighted score.
63    pub fn weighted_score(&self) -> f64 {
64        self.score * self.weight
65    }
66}
67
68impl Default for CriterionScoreData {
69    fn default() -> Self {
70        Self::new(0.0)
71    }
72}
73
74/// Assignment of scores to a rubric's criteria.
75#[derive(Debug, Clone, Default, Serialize, Deserialize)]
76pub struct RubricAssignment {
77    /// Map of criterion ID to score data.
78    #[serde(default)]
79    pub criterion_scores: HashMap<String, CriterionScoreData>,
80    /// Aggregated total score.
81    #[serde(default)]
82    pub total: f64,
83    /// Reference to the rubric used.
84    #[serde(default)]
85    pub rubric_ref: Option<String>,
86    /// Summary of the evaluation.
87    #[serde(default)]
88    pub summary: Option<String>,
89    /// Whether all required criteria passed.
90    #[serde(default)]
91    pub all_required_passed: Option<bool>,
92    /// Normalized total (0-1 range).
93    #[serde(default)]
94    pub normalized_total: Option<f64>,
95}
96
97impl RubricAssignment {
98    /// Create a new rubric assignment.
99    pub fn new() -> Self {
100        Self::default()
101    }
102
103    /// Add a criterion score.
104    pub fn with_score(
105        mut self,
106        criterion_id: impl Into<String>,
107        score: CriterionScoreData,
108    ) -> Self {
109        self.criterion_scores.insert(criterion_id.into(), score);
110        self
111    }
112
113    /// Set the total score.
114    pub fn with_total(mut self, total: f64) -> Self {
115        self.total = total;
116        self
117    }
118
119    /// Set the rubric reference.
120    pub fn with_rubric_ref(mut self, rubric_ref: impl Into<String>) -> Self {
121        self.rubric_ref = Some(rubric_ref.into());
122        self
123    }
124
125    /// Set the summary.
126    pub fn with_summary(mut self, summary: impl Into<String>) -> Self {
127        self.summary = Some(summary.into());
128        self
129    }
130
131    /// Calculate total from criterion scores using weighted sum.
132    pub fn calculate_weighted_total(&mut self) {
133        let total_weight: f64 = self.criterion_scores.values().map(|s| s.weight).sum();
134        if total_weight > 0.0 {
135            let weighted_sum: f64 = self
136                .criterion_scores
137                .values()
138                .map(|s| s.weighted_score())
139                .sum();
140            self.total = weighted_sum / total_weight;
141        }
142    }
143
144    /// Get score for a criterion.
145    pub fn get_score(&self, criterion_id: &str) -> Option<f64> {
146        self.criterion_scores.get(criterion_id).map(|s| s.score)
147    }
148}
149
150/// A complete judgement including rubric assignment and annotations.
151#[derive(Debug, Clone, Default, Serialize, Deserialize)]
152pub struct Judgement {
153    /// The rubric-based evaluation.
154    #[serde(default)]
155    pub rubric_assignment: Option<RubricAssignment>,
156    /// Free-form annotations.
157    #[serde(default)]
158    pub annotation: HashMap<String, Value>,
159    /// Overall pass/fail determination.
160    #[serde(default)]
161    pub passed: Option<bool>,
162    /// Confidence in the judgement (0-1).
163    #[serde(default)]
164    pub confidence: Option<f64>,
165    /// Source of the judgement (e.g., "verifier", "human", "model").
166    #[serde(default)]
167    pub source: Option<String>,
168    /// Timestamp of when judgement was made.
169    #[serde(default)]
170    pub judged_at: Option<String>,
171}
172
173impl Judgement {
174    /// Create a new judgement.
175    pub fn new() -> Self {
176        Self::default()
177    }
178
179    /// Set the rubric assignment.
180    pub fn with_rubric_assignment(mut self, assignment: RubricAssignment) -> Self {
181        self.rubric_assignment = Some(assignment);
182        self
183    }
184
185    /// Add an annotation.
186    pub fn with_annotation(mut self, key: impl Into<String>, value: Value) -> Self {
187        self.annotation.insert(key.into(), value);
188        self
189    }
190
191    /// Set passed status.
192    pub fn with_passed(mut self, passed: bool) -> Self {
193        self.passed = Some(passed);
194        self
195    }
196
197    /// Set confidence.
198    pub fn with_confidence(mut self, confidence: f64) -> Self {
199        self.confidence = Some(confidence.clamp(0.0, 1.0));
200        self
201    }
202
203    /// Set source.
204    pub fn with_source(mut self, source: impl Into<String>) -> Self {
205        self.source = Some(source.into());
206        self
207    }
208
209    /// Get the total score from the rubric assignment.
210    pub fn total_score(&self) -> Option<f64> {
211        self.rubric_assignment.as_ref().map(|a| a.total)
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    #[test]
220    fn test_criterion_score() {
221        let score = CriterionScoreData::new(8.5)
222            .with_reason("Good explanation")
223            .with_weight(2.0);
224
225        assert_eq!(score.score, 8.5);
226        assert_eq!(score.weighted_score(), 17.0);
227    }
228
229    #[test]
230    fn test_rubric_assignment() {
231        let mut assignment = RubricAssignment::new()
232            .with_score("clarity", CriterionScoreData::new(9.0).with_weight(1.0))
233            .with_score("accuracy", CriterionScoreData::new(7.0).with_weight(2.0))
234            .with_rubric_ref("eval_v1");
235
236        assignment.calculate_weighted_total();
237
238        // Weighted average: (9*1 + 7*2) / (1+2) = 23/3 ≈ 7.67
239        assert!((assignment.total - 7.666).abs() < 0.01);
240    }
241
242    #[test]
243    fn test_judgement() {
244        let assignment = RubricAssignment::new()
245            .with_total(8.5)
246            .with_summary("Good overall performance");
247
248        let judgement = Judgement::new()
249            .with_rubric_assignment(assignment)
250            .with_passed(true)
251            .with_confidence(0.95)
252            .with_source("verifier");
253
254        assert_eq!(judgement.total_score(), Some(8.5));
255        assert_eq!(judgement.passed, Some(true));
256        assert_eq!(judgement.confidence, Some(0.95));
257    }
258
259    #[test]
260    fn test_serde() {
261        let judgement = Judgement::new()
262            .with_passed(true)
263            .with_annotation("note", serde_json::json!("test"));
264
265        let json = serde_json::to_string(&judgement).unwrap();
266        let parsed: Judgement = serde_json::from_str(&json).unwrap();
267
268        assert_eq!(parsed.passed, Some(true));
269        assert_eq!(
270            parsed.annotation.get("note"),
271            Some(&serde_json::json!("test"))
272        );
273    }
274}