ai_agents_reasoning/
evaluation.rs1use serde::{Deserialize, Serialize};
2
3#[derive(Debug, Clone, Serialize, Deserialize)]
4pub struct EvaluationResult {
5 pub passed: bool,
6 pub confidence: f32,
7 pub criteria_results: Vec<CriterionResult>,
8}
9
10impl EvaluationResult {
11 pub fn new(passed: bool, confidence: f32) -> Self {
12 Self {
13 passed,
14 confidence,
15 criteria_results: Vec::new(),
16 }
17 }
18
19 pub fn with_criteria(mut self, criteria: Vec<CriterionResult>) -> Self {
20 self.criteria_results = criteria;
21 self
22 }
23
24 pub fn passed_all(&self) -> bool {
25 self.criteria_results.iter().all(|c| c.passed)
26 }
27
28 pub fn failed_criteria(&self) -> impl Iterator<Item = &CriterionResult> {
29 self.criteria_results.iter().filter(|c| !c.passed)
30 }
31
32 pub fn passing_criteria(&self) -> impl Iterator<Item = &CriterionResult> {
33 self.criteria_results.iter().filter(|c| c.passed)
34 }
35
36 pub fn pass_rate(&self) -> f32 {
37 if self.criteria_results.is_empty() {
38 return if self.passed { 1.0 } else { 0.0 };
39 }
40 let passed = self.criteria_results.iter().filter(|c| c.passed).count();
41 passed as f32 / self.criteria_results.len() as f32
42 }
43}
44
45impl Default for EvaluationResult {
46 fn default() -> Self {
47 Self {
48 passed: false,
49 confidence: 0.0,
50 criteria_results: Vec::new(),
51 }
52 }
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct CriterionResult {
57 pub criterion: String,
58 pub passed: bool,
59 #[serde(skip_serializing_if = "Option::is_none")]
60 pub reason: Option<String>,
61}
62
63impl CriterionResult {
64 pub fn pass(criterion: impl Into<String>) -> Self {
65 Self {
66 criterion: criterion.into(),
67 passed: true,
68 reason: None,
69 }
70 }
71
72 pub fn fail(criterion: impl Into<String>, reason: impl Into<String>) -> Self {
73 Self {
74 criterion: criterion.into(),
75 passed: false,
76 reason: Some(reason.into()),
77 }
78 }
79
80 pub fn with_reason(mut self, reason: impl Into<String>) -> Self {
81 self.reason = Some(reason.into());
82 self
83 }
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct ReflectionAttempt {
88 pub response: String,
89 pub evaluation: EvaluationResult,
90 #[serde(skip_serializing_if = "Option::is_none")]
91 pub feedback: Option<String>,
92}
93
94impl ReflectionAttempt {
95 pub fn new(response: impl Into<String>, evaluation: EvaluationResult) -> Self {
96 Self {
97 response: response.into(),
98 evaluation,
99 feedback: None,
100 }
101 }
102
103 pub fn with_feedback(mut self, feedback: impl Into<String>) -> Self {
104 self.feedback = Some(feedback.into());
105 self
106 }
107
108 pub fn passed(&self) -> bool {
109 self.evaluation.passed
110 }
111}
112
113#[cfg(test)]
114mod tests {
115 use super::*;
116
117 #[test]
118 fn test_evaluation_result_creation() {
119 let result = EvaluationResult::new(true, 0.95);
120 assert!(result.passed);
121 assert_eq!(result.confidence, 0.95);
122 assert!(result.criteria_results.is_empty());
123 }
124
125 #[test]
126 fn test_evaluation_result_with_criteria() {
127 let criteria = vec![
128 CriterionResult::pass("Addresses question"),
129 CriterionResult::fail("Complete response", "Response is truncated"),
130 CriterionResult::pass("Helpful"),
131 ];
132
133 let result = EvaluationResult::new(false, 0.6).with_criteria(criteria);
134
135 assert!(!result.passed);
136 assert!(!result.passed_all());
137 assert_eq!(result.failed_criteria().count(), 1);
138 assert_eq!(result.passing_criteria().count(), 2);
139 assert!((result.pass_rate() - 0.666).abs() < 0.01);
140 }
141
142 #[test]
143 fn test_evaluation_result_pass_rate() {
144 let empty = EvaluationResult::new(true, 1.0);
145 assert_eq!(empty.pass_rate(), 1.0);
146
147 let empty_failed = EvaluationResult::new(false, 0.0);
148 assert_eq!(empty_failed.pass_rate(), 0.0);
149 }
150
151 #[test]
152 fn test_criterion_result_pass() {
153 let criterion = CriterionResult::pass("Response is clear");
154 assert!(criterion.passed);
155 assert_eq!(criterion.criterion, "Response is clear");
156 assert!(criterion.reason.is_none());
157 }
158
159 #[test]
160 fn test_criterion_result_fail() {
161 let criterion = CriterionResult::fail("Accurate information", "Contains factual errors");
162 assert!(!criterion.passed);
163 assert_eq!(criterion.criterion, "Accurate information");
164 assert_eq!(
165 criterion.reason,
166 Some("Contains factual errors".to_string())
167 );
168 }
169
170 #[test]
171 fn test_criterion_result_with_reason() {
172 let criterion = CriterionResult::pass("Good response").with_reason("Excellent formatting");
173 assert!(criterion.passed);
174 assert_eq!(criterion.reason, Some("Excellent formatting".to_string()));
175 }
176
177 #[test]
178 fn test_reflection_attempt_creation() {
179 let evaluation = EvaluationResult::new(true, 0.9);
180 let attempt = ReflectionAttempt::new("This is my response", evaluation);
181
182 assert_eq!(attempt.response, "This is my response");
183 assert!(attempt.passed());
184 assert!(attempt.feedback.is_none());
185 }
186
187 #[test]
188 fn test_reflection_attempt_with_feedback() {
189 let evaluation = EvaluationResult::new(false, 0.4);
190 let attempt = ReflectionAttempt::new("Initial response", evaluation)
191 .with_feedback("Be more specific");
192
193 assert!(!attempt.passed());
194 assert_eq!(attempt.feedback, Some("Be more specific".to_string()));
195 }
196
197 #[test]
198 fn test_evaluation_result_serde() {
199 let criteria = vec![
200 CriterionResult::pass("Clear"),
201 CriterionResult::fail("Complete", "Missing details"),
202 ];
203 let result = EvaluationResult::new(false, 0.7).with_criteria(criteria);
204
205 let json = serde_json::to_string(&result).unwrap();
206 let parsed: EvaluationResult = serde_json::from_str(&json).unwrap();
207
208 assert!(!parsed.passed);
209 assert_eq!(parsed.confidence, 0.7);
210 assert_eq!(parsed.criteria_results.len(), 2);
211 }
212
213 #[test]
214 fn test_reflection_attempt_serde() {
215 let evaluation = EvaluationResult::new(true, 0.95);
216 let attempt = ReflectionAttempt::new("Response text", evaluation);
217
218 let json = serde_json::to_string(&attempt).unwrap();
219 let parsed: ReflectionAttempt = serde_json::from_str(&json).unwrap();
220
221 assert_eq!(parsed.response, "Response text");
222 assert!(parsed.passed());
223 }
224}