Skip to main content

ai_agents_reasoning/
evaluation.rs

1use serde::{Deserialize, Serialize};
2
3#[derive(Debug, Clone, Serialize, Deserialize)]
4pub struct EvaluationResult {
5    pub passed: bool,
6    pub confidence: f32,
7    pub criteria_results: Vec<CriterionResult>,
8}
9
10impl EvaluationResult {
11    pub fn new(passed: bool, confidence: f32) -> Self {
12        Self {
13            passed,
14            confidence,
15            criteria_results: Vec::new(),
16        }
17    }
18
19    pub fn with_criteria(mut self, criteria: Vec<CriterionResult>) -> Self {
20        self.criteria_results = criteria;
21        self
22    }
23
24    pub fn passed_all(&self) -> bool {
25        self.criteria_results.iter().all(|c| c.passed)
26    }
27
28    pub fn failed_criteria(&self) -> impl Iterator<Item = &CriterionResult> {
29        self.criteria_results.iter().filter(|c| !c.passed)
30    }
31
32    pub fn passing_criteria(&self) -> impl Iterator<Item = &CriterionResult> {
33        self.criteria_results.iter().filter(|c| c.passed)
34    }
35
36    pub fn pass_rate(&self) -> f32 {
37        if self.criteria_results.is_empty() {
38            return if self.passed { 1.0 } else { 0.0 };
39        }
40        let passed = self.criteria_results.iter().filter(|c| c.passed).count();
41        passed as f32 / self.criteria_results.len() as f32
42    }
43}
44
45impl Default for EvaluationResult {
46    fn default() -> Self {
47        Self {
48            passed: false,
49            confidence: 0.0,
50            criteria_results: Vec::new(),
51        }
52    }
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct CriterionResult {
57    pub criterion: String,
58    pub passed: bool,
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub reason: Option<String>,
61}
62
63impl CriterionResult {
64    pub fn pass(criterion: impl Into<String>) -> Self {
65        Self {
66            criterion: criterion.into(),
67            passed: true,
68            reason: None,
69        }
70    }
71
72    pub fn fail(criterion: impl Into<String>, reason: impl Into<String>) -> Self {
73        Self {
74            criterion: criterion.into(),
75            passed: false,
76            reason: Some(reason.into()),
77        }
78    }
79
80    pub fn with_reason(mut self, reason: impl Into<String>) -> Self {
81        self.reason = Some(reason.into());
82        self
83    }
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct ReflectionAttempt {
88    pub response: String,
89    pub evaluation: EvaluationResult,
90    #[serde(skip_serializing_if = "Option::is_none")]
91    pub feedback: Option<String>,
92}
93
94impl ReflectionAttempt {
95    pub fn new(response: impl Into<String>, evaluation: EvaluationResult) -> Self {
96        Self {
97            response: response.into(),
98            evaluation,
99            feedback: None,
100        }
101    }
102
103    pub fn with_feedback(mut self, feedback: impl Into<String>) -> Self {
104        self.feedback = Some(feedback.into());
105        self
106    }
107
108    pub fn passed(&self) -> bool {
109        self.evaluation.passed
110    }
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    #[test]
118    fn test_evaluation_result_creation() {
119        let result = EvaluationResult::new(true, 0.95);
120        assert!(result.passed);
121        assert_eq!(result.confidence, 0.95);
122        assert!(result.criteria_results.is_empty());
123    }
124
125    #[test]
126    fn test_evaluation_result_with_criteria() {
127        let criteria = vec![
128            CriterionResult::pass("Addresses question"),
129            CriterionResult::fail("Complete response", "Response is truncated"),
130            CriterionResult::pass("Helpful"),
131        ];
132
133        let result = EvaluationResult::new(false, 0.6).with_criteria(criteria);
134
135        assert!(!result.passed);
136        assert!(!result.passed_all());
137        assert_eq!(result.failed_criteria().count(), 1);
138        assert_eq!(result.passing_criteria().count(), 2);
139        assert!((result.pass_rate() - 0.666).abs() < 0.01);
140    }
141
142    #[test]
143    fn test_evaluation_result_pass_rate() {
144        let empty = EvaluationResult::new(true, 1.0);
145        assert_eq!(empty.pass_rate(), 1.0);
146
147        let empty_failed = EvaluationResult::new(false, 0.0);
148        assert_eq!(empty_failed.pass_rate(), 0.0);
149    }
150
151    #[test]
152    fn test_criterion_result_pass() {
153        let criterion = CriterionResult::pass("Response is clear");
154        assert!(criterion.passed);
155        assert_eq!(criterion.criterion, "Response is clear");
156        assert!(criterion.reason.is_none());
157    }
158
159    #[test]
160    fn test_criterion_result_fail() {
161        let criterion = CriterionResult::fail("Accurate information", "Contains factual errors");
162        assert!(!criterion.passed);
163        assert_eq!(criterion.criterion, "Accurate information");
164        assert_eq!(
165            criterion.reason,
166            Some("Contains factual errors".to_string())
167        );
168    }
169
170    #[test]
171    fn test_criterion_result_with_reason() {
172        let criterion = CriterionResult::pass("Good response").with_reason("Excellent formatting");
173        assert!(criterion.passed);
174        assert_eq!(criterion.reason, Some("Excellent formatting".to_string()));
175    }
176
177    #[test]
178    fn test_reflection_attempt_creation() {
179        let evaluation = EvaluationResult::new(true, 0.9);
180        let attempt = ReflectionAttempt::new("This is my response", evaluation);
181
182        assert_eq!(attempt.response, "This is my response");
183        assert!(attempt.passed());
184        assert!(attempt.feedback.is_none());
185    }
186
187    #[test]
188    fn test_reflection_attempt_with_feedback() {
189        let evaluation = EvaluationResult::new(false, 0.4);
190        let attempt = ReflectionAttempt::new("Initial response", evaluation)
191            .with_feedback("Be more specific");
192
193        assert!(!attempt.passed());
194        assert_eq!(attempt.feedback, Some("Be more specific".to_string()));
195    }
196
197    #[test]
198    fn test_evaluation_result_serde() {
199        let criteria = vec![
200            CriterionResult::pass("Clear"),
201            CriterionResult::fail("Complete", "Missing details"),
202        ];
203        let result = EvaluationResult::new(false, 0.7).with_criteria(criteria);
204
205        let json = serde_json::to_string(&result).unwrap();
206        let parsed: EvaluationResult = serde_json::from_str(&json).unwrap();
207
208        assert!(!parsed.passed);
209        assert_eq!(parsed.confidence, 0.7);
210        assert_eq!(parsed.criteria_results.len(), 2);
211    }
212
213    #[test]
214    fn test_reflection_attempt_serde() {
215        let evaluation = EvaluationResult::new(true, 0.95);
216        let attempt = ReflectionAttempt::new("Response text", evaluation);
217
218        let json = serde_json::to_string(&attempt).unwrap();
219        let parsed: ReflectionAttempt = serde_json::from_str(&json).unwrap();
220
221        assert_eq!(parsed.response, "Response text");
222        assert!(parsed.passed());
223    }
224}