use crate::types::message::{Content, Message, Role};
#[derive(Debug, Clone)]
pub struct Criterion {
pub text: String,
pub required: bool,
pub weight: f32,
}
impl Criterion {
pub fn required(text: impl Into<String>) -> Self {
Self {
text: text.into(),
required: true,
weight: 1.0,
}
}
pub fn optional(text: impl Into<String>) -> Self {
Self {
text: text.into(),
required: false,
weight: 1.0,
}
}
pub fn with_weight(mut self, w: f32) -> Self {
self.weight = w;
self
}
}
impl From<String> for Criterion {
fn from(s: String) -> Self {
Self::required(s)
}
}
impl From<&str> for Criterion {
fn from(s: &str) -> Self {
Self::required(s)
}
}
#[derive(Debug, Clone)]
pub struct CriterionResult {
pub criterion: String,
pub passed: bool,
pub score: f32,
pub feedback: String,
}
#[derive(Debug, Clone)]
pub struct SkillCandidate {
pub name: String,
pub description: String,
pub when_to_use: Option<String>,
pub content: String,
}
#[derive(Debug, Clone)]
pub struct EvalResult {
pub passed: bool,
pub overall_score: f32,
pub feedback: String,
pub details: Vec<CriterionResult>,
pub skill_candidate: Option<SkillCandidate>,
}
pub fn build_eval_messages(
goal: &str,
criteria: &[Criterion],
result: &str,
attempt: u32,
extract_skill_on_pass: bool,
) -> Vec<Message> {
let criteria_text = if criteria.is_empty() {
"No explicit criteria — use general quality judgement.".to_string()
} else {
criteria
.iter()
.enumerate()
.map(|(i, c)| {
let tag = if c.required {
"[required]"
} else {
"[optional]"
};
let weight = if (c.weight - 1.0).abs() > 0.01 {
format!(" weight={:.1}", c.weight)
} else {
String::new()
};
format!("{}. {}{}{}", i + 1, tag, weight, c.text)
})
.collect::<Vec<_>>()
.join("\n")
};
let details_schema = r#"[{"criterion":"...","passed":bool,"score":0.0-1.0,"feedback":"..."}]"#;
let skill_instruction = if extract_skill_on_pass {
"\nIf passed=true and the approach is reusable, add a \"skill\" field:\
\n{\"name\":\"snake_case\",\"description\":\"one sentence\",\"when_to_use\":\"optional hint\",\"content\":\"markdown body (no frontmatter)\"}"
} else {
""
};
let system = Message {
role: Role::System,
content: Content::Text(format!(
"You are an impartial evaluator. Assess whether the agent's output meets the goal and criteria.\n\
[required] criteria must ALL pass for overall passed=true.\n\
[optional] criteria contribute to overall_score but do not block passing.\n\
Respond with JSON only:\n\
{{\"passed\":bool,\"overall_score\":0.0-1.0,\"feedback\":\"concise summary\",\
\"details\":{details_schema}{skill_instruction}}}"
)),
tool_calls: vec![],
token_count: None,
};
let user = Message {
role: Role::User,
content: Content::Text(format!(
"## Goal\n{goal}\n\n## Criteria\n{criteria_text}\n\n## Agent Output (attempt {attempt})\n{result}"
)),
tool_calls: vec![],
token_count: None,
};
vec![system, user]
}
pub fn verdict_output_schema(extract_skill_on_pass: bool) -> serde_json::Value {
let mut properties = serde_json::json!({
"passed": { "type": "boolean", "description": "true iff all [required] criteria pass" },
"overall_score": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
"feedback": { "type": "string", "description": "concise summary; on fail, what to fix next attempt" },
"details": {
"type": "array",
"items": {
"type": "object",
"required": ["criterion", "passed", "score", "feedback"],
"properties": {
"criterion": { "type": "string" },
"passed": { "type": "boolean" },
"score": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
"feedback": { "type": "string" }
}
}
}
});
if extract_skill_on_pass {
properties["skill"] = serde_json::json!({
"type": "object",
"description": "optional reusable skill distilled from a passing run",
"required": ["name", "description", "content"],
"properties": {
"name": { "type": "string", "description": "snake_case" },
"description": { "type": "string" },
"when_to_use": { "type": "string" },
"content": { "type": "string", "description": "markdown body, no frontmatter" }
}
});
}
serde_json::json!({
"type": "object",
"required": ["passed", "overall_score", "feedback"],
"properties": properties
})
}
pub fn parse_verdict(content: &str) -> EvalResult {
let json_str = extract_json(content);
let v: serde_json::Value = serde_json::from_str(json_str).unwrap_or(serde_json::Value::Null);
let passed = v.get("passed").and_then(|x| x.as_bool()).unwrap_or(false);
let overall_score = v
.get("overall_score")
.and_then(|x| x.as_f64())
.map(|f| f as f32)
.unwrap_or(if passed { 1.0 } else { 0.0 });
let feedback = v
.get("feedback")
.and_then(|x| x.as_str())
.unwrap_or("No feedback provided.")
.to_string();
let details = v
.get("details")
.and_then(|d| d.as_array())
.map(|arr| {
arr.iter()
.filter_map(|item| {
let criterion = item.get("criterion")?.as_str()?.to_string();
let item_passed = item
.get("passed")
.and_then(|x| x.as_bool())
.unwrap_or(false);
let score = item
.get("score")
.and_then(|x| x.as_f64())
.map(|f| f as f32)
.unwrap_or(if item_passed { 1.0 } else { 0.0 });
let item_feedback = item
.get("feedback")
.and_then(|x| x.as_str())
.unwrap_or("")
.to_string();
Some(CriterionResult {
criterion,
passed: item_passed,
score,
feedback: item_feedback,
})
})
.collect()
})
.unwrap_or_default();
let skill_candidate = v.get("skill").and_then(|s| {
let name = s.get("name")?.as_str()?.to_string();
let description = s.get("description")?.as_str()?.to_string();
let content = s.get("content")?.as_str()?.to_string();
if name.is_empty() {
return None;
}
let when_to_use = s
.get("when_to_use")
.and_then(|x| x.as_str())
.filter(|x| !x.is_empty())
.map(|x| x.to_string());
Some(SkillCandidate {
name,
description,
when_to_use,
content,
})
});
EvalResult {
passed,
overall_score,
feedback,
details,
skill_candidate,
}
}
fn extract_json(s: &str) -> &str {
if let Some(start) = s.find('{') {
if let Some(end) = s.rfind('}') {
return &s[start..=end];
}
}
s
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn build_eval_messages_carries_goal_and_criteria() {
let msgs = build_eval_messages(
"Write a function",
&[Criterion::required("Must handle errors")],
"fn foo() {}",
1,
true,
);
assert_eq!(msgs.len(), 2);
assert!(matches!(msgs[0].role, Role::System));
let Content::Text(user) = &msgs[1].content else {
panic!("expected text")
};
assert!(user.contains("Write a function"));
assert!(user.contains("[required]Must handle errors"));
assert!(user.contains("attempt 1"));
let Content::Text(system) = &msgs[0].content else {
panic!("expected text")
};
assert!(system.contains("\"skill\""));
}
#[test]
fn build_eval_messages_omits_skill_instruction_when_disabled() {
let msgs = build_eval_messages("g", &[], "r", 1, false);
let Content::Text(system) = &msgs[0].content else {
panic!("expected text")
};
assert!(!system.contains("\"name\":\"snake_case\""));
}
#[test]
fn parse_verdict_failed_no_skill() {
let result = parse_verdict(
r#"{"passed":false,"overall_score":0.2,"feedback":"Missing error handling","details":[{"criterion":"Must handle errors","passed":false,"score":0.2,"feedback":"No error handling found"}]}"#,
);
assert!(!result.passed);
assert_eq!(result.feedback, "Missing error handling");
assert_eq!(result.details.len(), 1);
assert!(!result.details[0].passed);
assert!(result.skill_candidate.is_none());
}
#[test]
fn parse_verdict_passed_with_skill_and_details() {
let json = r#"{"passed":true,"overall_score":0.95,"feedback":"All criteria met","details":[{"criterion":"Must handle errors","passed":true,"score":1.0,"feedback":"Good error handling"}],"skill":{"name":"robust_api_call","description":"How to call APIs with retries","content":"Robust API Call - Always retry on 5xx."}}"#;
let result = parse_verdict(json);
assert!(result.passed);
assert!(result.overall_score > 0.9);
assert_eq!(result.details.len(), 1);
assert!(result.details[0].passed);
let skill = result.skill_candidate.unwrap();
assert_eq!(skill.name, "robust_api_call");
assert!(skill.content.contains("retry"));
}
#[test]
fn parse_verdict_strips_markdown_fences() {
let result = parse_verdict("```json\n{\"passed\":true,\"feedback\":\"good\"}\n```");
assert!(result.passed);
}
#[test]
fn criterion_from_string_is_required() {
let c = Criterion::from("some check");
assert!(c.required);
assert!((c.weight - 1.0).abs() < 0.001);
}
#[test]
fn optional_criterion_with_weight() {
let c = Criterion::optional("bonus check").with_weight(0.5);
assert!(!c.required);
assert!((c.weight - 0.5).abs() < 0.001);
}
#[test]
fn verdict_output_schema_shape() {
let schema = verdict_output_schema(true);
assert_eq!(schema["type"], "object");
assert!(schema["properties"]["passed"].is_object());
assert!(schema["properties"]["overall_score"].is_object());
assert!(schema["properties"]["details"].is_object());
assert!(schema["properties"]["skill"].is_object());
let no_skill = verdict_output_schema(false);
assert!(no_skill["properties"]["skill"].is_null());
}
}