1use crate::types::message::{Content, Message, Role};
19
20#[derive(Debug, Clone)]
26pub struct Criterion {
27 pub text: String,
28 pub required: bool,
30 pub weight: f32,
32}
33
34impl Criterion {
35 pub fn required(text: impl Into<String>) -> Self {
36 Self {
37 text: text.into(),
38 required: true,
39 weight: 1.0,
40 }
41 }
42
43 pub fn optional(text: impl Into<String>) -> Self {
44 Self {
45 text: text.into(),
46 required: false,
47 weight: 1.0,
48 }
49 }
50
51 pub fn with_weight(mut self, w: f32) -> Self {
52 self.weight = w;
53 self
54 }
55}
56
57impl From<String> for Criterion {
58 fn from(s: String) -> Self {
59 Self::required(s)
60 }
61}
62
63impl From<&str> for Criterion {
64 fn from(s: &str) -> Self {
65 Self::required(s)
66 }
67}
68
69#[derive(Debug, Clone)]
75pub struct CriterionResult {
76 pub criterion: String,
77 pub passed: bool,
78 pub score: f32,
80 pub feedback: String,
81}
82
83#[derive(Debug, Clone)]
85pub struct SkillCandidate {
86 pub name: String,
87 pub description: String,
88 pub when_to_use: Option<String>,
89 pub content: String,
91}
92
93#[derive(Debug, Clone)]
95pub struct EvalResult {
96 pub passed: bool,
97 pub overall_score: f32,
99 pub feedback: String,
101 pub details: Vec<CriterionResult>,
103 pub skill_candidate: Option<SkillCandidate>,
104}
105
106pub fn build_eval_messages(
114 goal: &str,
115 criteria: &[Criterion],
116 result: &str,
117 attempt: u32,
118 extract_skill_on_pass: bool,
119) -> Vec<Message> {
120 let criteria_text = if criteria.is_empty() {
121 "No explicit criteria — use general quality judgement.".to_string()
122 } else {
123 criteria
124 .iter()
125 .enumerate()
126 .map(|(i, c)| {
127 let tag = if c.required {
128 "[required]"
129 } else {
130 "[optional]"
131 };
132 let weight = if (c.weight - 1.0).abs() > 0.01 {
133 format!(" weight={:.1}", c.weight)
134 } else {
135 String::new()
136 };
137 format!("{}. {}{}{}", i + 1, tag, weight, c.text)
138 })
139 .collect::<Vec<_>>()
140 .join("\n")
141 };
142
143 let details_schema = r#"[{"criterion":"...","passed":bool,"score":0.0-1.0,"feedback":"..."}]"#;
144
145 let skill_instruction = if extract_skill_on_pass {
146 "\nIf passed=true and the approach is reusable, add a \"skill\" field:\
147\n{\"name\":\"snake_case\",\"description\":\"one sentence\",\"when_to_use\":\"optional hint\",\"content\":\"markdown body (no frontmatter)\"}"
148 } else {
149 ""
150 };
151
152 let system = Message {
153 role: Role::System,
154 content: Content::Text(format!(
155 "You are an impartial evaluator. Assess whether the agent's output meets the goal and criteria.\n\
156 [required] criteria must ALL pass for overall passed=true.\n\
157 [optional] criteria contribute to overall_score but do not block passing.\n\
158 Respond with JSON only:\n\
159 {{\"passed\":bool,\"overall_score\":0.0-1.0,\"feedback\":\"concise summary\",\
160 \"details\":{details_schema}{skill_instruction}}}"
161 )),
162 tool_calls: vec![],
163 token_count: None,
164 };
165
166 let user = Message {
167 role: Role::User,
168 content: Content::Text(format!(
169 "## Goal\n{goal}\n\n## Criteria\n{criteria_text}\n\n## Agent Output (attempt {attempt})\n{result}"
170 )),
171 tool_calls: vec![],
172 token_count: None,
173 };
174
175 vec![system, user]
176}
177
178pub fn verdict_output_schema(extract_skill_on_pass: bool) -> serde_json::Value {
186 let mut properties = serde_json::json!({
187 "passed": { "type": "boolean", "description": "true iff all [required] criteria pass" },
188 "overall_score": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
189 "feedback": { "type": "string", "description": "concise summary; on fail, what to fix next attempt" },
190 "details": {
191 "type": "array",
192 "items": {
193 "type": "object",
194 "required": ["criterion", "passed", "score", "feedback"],
195 "properties": {
196 "criterion": { "type": "string" },
197 "passed": { "type": "boolean" },
198 "score": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
199 "feedback": { "type": "string" }
200 }
201 }
202 }
203 });
204 if extract_skill_on_pass {
205 properties["skill"] = serde_json::json!({
206 "type": "object",
207 "description": "optional reusable skill distilled from a passing run",
208 "required": ["name", "description", "content"],
209 "properties": {
210 "name": { "type": "string", "description": "snake_case" },
211 "description": { "type": "string" },
212 "when_to_use": { "type": "string" },
213 "content": { "type": "string", "description": "markdown body, no frontmatter" }
214 }
215 });
216 }
217 serde_json::json!({
218 "type": "object",
219 "required": ["passed", "overall_score", "feedback"],
220 "properties": properties
221 })
222}
223
224pub fn parse_verdict(content: &str) -> EvalResult {
231 let json_str = extract_json(content);
232 let v: serde_json::Value = serde_json::from_str(json_str).unwrap_or(serde_json::Value::Null);
233
234 let passed = v.get("passed").and_then(|x| x.as_bool()).unwrap_or(false);
235 let overall_score = v
236 .get("overall_score")
237 .and_then(|x| x.as_f64())
238 .map(|f| f as f32)
239 .unwrap_or(if passed { 1.0 } else { 0.0 });
240 let feedback = v
241 .get("feedback")
242 .and_then(|x| x.as_str())
243 .unwrap_or("No feedback provided.")
244 .to_string();
245
246 let details = v
247 .get("details")
248 .and_then(|d| d.as_array())
249 .map(|arr| {
250 arr.iter()
251 .filter_map(|item| {
252 let criterion = item.get("criterion")?.as_str()?.to_string();
253 let item_passed = item
254 .get("passed")
255 .and_then(|x| x.as_bool())
256 .unwrap_or(false);
257 let score = item
258 .get("score")
259 .and_then(|x| x.as_f64())
260 .map(|f| f as f32)
261 .unwrap_or(if item_passed { 1.0 } else { 0.0 });
262 let item_feedback = item
263 .get("feedback")
264 .and_then(|x| x.as_str())
265 .unwrap_or("")
266 .to_string();
267 Some(CriterionResult {
268 criterion,
269 passed: item_passed,
270 score,
271 feedback: item_feedback,
272 })
273 })
274 .collect()
275 })
276 .unwrap_or_default();
277
278 let skill_candidate = v.get("skill").and_then(|s| {
279 let name = s.get("name")?.as_str()?.to_string();
280 let description = s.get("description")?.as_str()?.to_string();
281 let content = s.get("content")?.as_str()?.to_string();
282 if name.is_empty() {
283 return None;
284 }
285 let when_to_use = s
286 .get("when_to_use")
287 .and_then(|x| x.as_str())
288 .filter(|x| !x.is_empty())
289 .map(|x| x.to_string());
290 Some(SkillCandidate {
291 name,
292 description,
293 when_to_use,
294 content,
295 })
296 });
297
298 EvalResult {
299 passed,
300 overall_score,
301 feedback,
302 details,
303 skill_candidate,
304 }
305}
306
307fn extract_json(s: &str) -> &str {
308 if let Some(start) = s.find('{') {
310 if let Some(end) = s.rfind('}') {
311 return &s[start..=end];
312 }
313 }
314 s
315}
316
317#[cfg(test)]
322mod tests {
323 use super::*;
324
325 #[test]
326 fn build_eval_messages_carries_goal_and_criteria() {
327 let msgs = build_eval_messages(
328 "Write a function",
329 &[Criterion::required("Must handle errors")],
330 "fn foo() {}",
331 1,
332 true,
333 );
334 assert_eq!(msgs.len(), 2);
335 assert!(matches!(msgs[0].role, Role::System));
336 let Content::Text(user) = &msgs[1].content else {
337 panic!("expected text")
338 };
339 assert!(user.contains("Write a function"));
340 assert!(user.contains("[required]Must handle errors"));
341 assert!(user.contains("attempt 1"));
342 let Content::Text(system) = &msgs[0].content else {
344 panic!("expected text")
345 };
346 assert!(system.contains("\"skill\""));
347 }
348
349 #[test]
350 fn build_eval_messages_omits_skill_instruction_when_disabled() {
351 let msgs = build_eval_messages("g", &[], "r", 1, false);
352 let Content::Text(system) = &msgs[0].content else {
353 panic!("expected text")
354 };
355 assert!(!system.contains("\"name\":\"snake_case\""));
356 }
357
358 #[test]
359 fn parse_verdict_failed_no_skill() {
360 let result = parse_verdict(
361 r#"{"passed":false,"overall_score":0.2,"feedback":"Missing error handling","details":[{"criterion":"Must handle errors","passed":false,"score":0.2,"feedback":"No error handling found"}]}"#,
362 );
363 assert!(!result.passed);
364 assert_eq!(result.feedback, "Missing error handling");
365 assert_eq!(result.details.len(), 1);
366 assert!(!result.details[0].passed);
367 assert!(result.skill_candidate.is_none());
368 }
369
370 #[test]
371 fn parse_verdict_passed_with_skill_and_details() {
372 let json = r#"{"passed":true,"overall_score":0.95,"feedback":"All criteria met","details":[{"criterion":"Must handle errors","passed":true,"score":1.0,"feedback":"Good error handling"}],"skill":{"name":"robust_api_call","description":"How to call APIs with retries","content":"Robust API Call - Always retry on 5xx."}}"#;
373 let result = parse_verdict(json);
374 assert!(result.passed);
375 assert!(result.overall_score > 0.9);
376 assert_eq!(result.details.len(), 1);
377 assert!(result.details[0].passed);
378 let skill = result.skill_candidate.unwrap();
379 assert_eq!(skill.name, "robust_api_call");
380 assert!(skill.content.contains("retry"));
381 }
382
383 #[test]
384 fn parse_verdict_strips_markdown_fences() {
385 let result = parse_verdict("```json\n{\"passed\":true,\"feedback\":\"good\"}\n```");
386 assert!(result.passed);
387 }
388
389 #[test]
390 fn criterion_from_string_is_required() {
391 let c = Criterion::from("some check");
392 assert!(c.required);
393 assert!((c.weight - 1.0).abs() < 0.001);
394 }
395
396 #[test]
397 fn optional_criterion_with_weight() {
398 let c = Criterion::optional("bonus check").with_weight(0.5);
399 assert!(!c.required);
400 assert!((c.weight - 0.5).abs() < 0.001);
401 }
402
403 #[test]
404 fn verdict_output_schema_shape() {
405 let schema = verdict_output_schema(true);
406 assert_eq!(schema["type"], "object");
407 assert!(schema["properties"]["passed"].is_object());
408 assert!(schema["properties"]["overall_score"].is_object());
409 assert!(schema["properties"]["details"].is_object());
410 assert!(schema["properties"]["skill"].is_object());
411 let no_skill = verdict_output_schema(false);
413 assert!(no_skill["properties"]["skill"].is_null());
414 }
415}