use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Rubric {
pub id: String,
pub name: String,
pub description: String,
pub criteria: Vec<RubricCriterion>,
pub pass_threshold: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RubricCriterion {
pub name: String,
pub description: String,
#[serde(default = "default_weight")]
pub weight: f64,
pub levels: Vec<ScoreLevel>,
}
fn default_weight() -> f64 { 1.0 }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScoreLevel {
pub min_score: f64,
pub max_score: f64,
pub label: String,
pub description: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RubricEvaluation {
pub rubric_id: String,
pub scores: Vec<CriterionScore>,
pub overall_score: f64,
pub passed: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CriterionScore {
pub criterion: String,
pub score: f64,
pub explanation: String,
pub level_label: String,
}
impl Rubric {
pub fn evaluate(&self, scores: Vec<CriterionScore>) -> RubricEvaluation {
let total_weight: f64 = self.criteria.iter().map(|c| c.weight).sum();
if total_weight == 0.0 {
return RubricEvaluation {
rubric_id: self.id.clone(),
scores,
overall_score: 0.0,
passed: false,
};
}
let weighted_sum: f64 = scores.iter()
.map(|s| {
let weight = self.criteria.iter()
.find(|c| c.name == s.criterion)
.map(|c| c.weight)
.unwrap_or(1.0);
s.score * weight
})
.sum();
let overall = weighted_sum / total_weight;
let passed = overall >= self.pass_threshold;
RubricEvaluation {
rubric_id: self.id.clone(),
scores,
overall_score: overall,
passed,
}
}
pub fn evaluation_prompt(&self, work_output: &str) -> String {
let criteria_text: Vec<String> = self.criteria.iter()
.map(|c| {
let levels_text: Vec<String> = c.levels.iter()
.map(|l| format!(" - **{:.1}-{:.1}** ({}): {}", l.min_score, l.max_score, l.label, l.description))
.collect();
format!("### {} (weight: {:.1})\n{}\n{}", c.name, c.weight, c.description, levels_text.join("\n"))
})
.collect();
format!(
r#"Evaluate the following work output against this quality rubric.
## Rubric: {name}
{description}
## Criteria
{criteria}
## Work Output
{work}
## Instructions
Score each criterion from 0.0 to 1.0. Be strict and objective.
Respond with ONLY a JSON object:
```json
{{
"scores": [
{{"criterion": "criterion_name", "score": 0.0, "explanation": "reason", "level_label": "level"}}
]
}}
```"#,
name = self.name,
description = self.description,
criteria = criteria_text.join("\n\n"),
work = work_output,
)
}
}
pub fn code_review_rubric() -> Rubric {
Rubric {
id: "code_review".into(),
name: "Code Review".into(),
description: "Evaluates code quality, correctness, and maintainability.".into(),
pass_threshold: 0.7,
criteria: vec![
RubricCriterion {
name: "correctness".into(),
description: "Does the code correctly implement the requirements?".into(),
weight: 2.0,
levels: vec![
ScoreLevel { min_score: 0.9, max_score: 1.0, label: "excellent".into(), description: "Fully correct, handles edge cases.".into() },
ScoreLevel { min_score: 0.7, max_score: 0.89, label: "good".into(), description: "Correct for main cases, minor edge case gaps.".into() },
ScoreLevel { min_score: 0.4, max_score: 0.69, label: "partial".into(), description: "Some functionality works, significant gaps.".into() },
ScoreLevel { min_score: 0.0, max_score: 0.39, label: "failing".into(), description: "Fundamentally broken or incomplete.".into() },
],
},
RubricCriterion {
name: "error_handling".into(),
description: "Are errors handled gracefully? No panics, clear error messages.".into(),
weight: 1.5,
levels: vec![
ScoreLevel { min_score: 0.8, max_score: 1.0, label: "robust".into(), description: "All error paths handled, typed errors.".into() },
ScoreLevel { min_score: 0.5, max_score: 0.79, label: "adequate".into(), description: "Main errors handled, some unwrap/expect.".into() },
ScoreLevel { min_score: 0.0, max_score: 0.49, label: "fragile".into(), description: "Panics on errors, missing error handling.".into() },
],
},
RubricCriterion {
name: "readability".into(),
description: "Is the code clear, well-named, and easy to follow?".into(),
weight: 1.0,
levels: vec![
ScoreLevel { min_score: 0.8, max_score: 1.0, label: "clear".into(), description: "Self-documenting, good naming, logical structure.".into() },
ScoreLevel { min_score: 0.5, max_score: 0.79, label: "acceptable".into(), description: "Generally readable, minor clarity issues.".into() },
ScoreLevel { min_score: 0.0, max_score: 0.49, label: "confusing".into(), description: "Hard to follow, poor naming, tangled logic.".into() },
],
},
RubricCriterion {
name: "test_coverage".into(),
description: "Are there tests? Do they cover important paths?".into(),
weight: 1.5,
levels: vec![
ScoreLevel { min_score: 0.8, max_score: 1.0, label: "thorough".into(), description: "Comprehensive tests, edge cases covered.".into() },
ScoreLevel { min_score: 0.5, max_score: 0.79, label: "basic".into(), description: "Happy path tested, some edge cases.".into() },
ScoreLevel { min_score: 0.0, max_score: 0.49, label: "missing".into(), description: "No tests or trivial-only tests.".into() },
],
},
],
}
}
pub fn security_review_rubric() -> Rubric {
Rubric {
id: "security_review".into(),
name: "Security Review".into(),
description: "Evaluates security posture of code changes.".into(),
pass_threshold: 0.8,
criteria: vec![
RubricCriterion {
name: "input_validation".into(),
description: "Are all external inputs validated and sanitized?".into(),
weight: 2.0,
levels: vec![
ScoreLevel { min_score: 0.8, max_score: 1.0, label: "secure".into(), description: "All inputs validated, parameterized queries, no injection.".into() },
ScoreLevel { min_score: 0.5, max_score: 0.79, label: "partial".into(), description: "Some validation, minor gaps.".into() },
ScoreLevel { min_score: 0.0, max_score: 0.49, label: "vulnerable".into(), description: "Missing validation, injection possible.".into() },
],
},
RubricCriterion {
name: "secrets_management".into(),
description: "Are secrets handled properly? No hardcoded keys.".into(),
weight: 2.0,
levels: vec![
ScoreLevel { min_score: 0.8, max_score: 1.0, label: "secure".into(), description: "Env vars only, no secrets in code or logs.".into() },
ScoreLevel { min_score: 0.0, max_score: 0.79, label: "risky".into(), description: "Hardcoded values or logged secrets.".into() },
],
},
RubricCriterion {
name: "auth_boundaries".into(),
description: "Are trust boundaries enforced? Proper auth checks.".into(),
weight: 1.5,
levels: vec![
ScoreLevel { min_score: 0.8, max_score: 1.0, label: "enforced".into(), description: "All endpoints protected, RBAC applied.".into() },
ScoreLevel { min_score: 0.5, max_score: 0.79, label: "partial".into(), description: "Main endpoints protected, some gaps.".into() },
ScoreLevel { min_score: 0.0, max_score: 0.49, label: "missing".into(), description: "Open endpoints, no auth checks.".into() },
],
},
],
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rubric_evaluate_passes() {
let rubric = code_review_rubric();
let scores = vec![
CriterionScore { criterion: "correctness".into(), score: 0.9, explanation: "Good".into(), level_label: "excellent".into() },
CriterionScore { criterion: "error_handling".into(), score: 0.8, explanation: "Ok".into(), level_label: "robust".into() },
CriterionScore { criterion: "readability".into(), score: 0.7, explanation: "Clear".into(), level_label: "acceptable".into() },
CriterionScore { criterion: "test_coverage".into(), score: 0.8, explanation: "Good".into(), level_label: "thorough".into() },
];
let eval = rubric.evaluate(scores);
assert!(eval.passed);
assert!(eval.overall_score > 0.7);
}
#[test]
fn rubric_evaluate_fails() {
let rubric = code_review_rubric();
let scores = vec![
CriterionScore { criterion: "correctness".into(), score: 0.3, explanation: "Broken".into(), level_label: "failing".into() },
CriterionScore { criterion: "error_handling".into(), score: 0.2, explanation: "Panics".into(), level_label: "fragile".into() },
CriterionScore { criterion: "readability".into(), score: 0.4, explanation: "Messy".into(), level_label: "confusing".into() },
CriterionScore { criterion: "test_coverage".into(), score: 0.1, explanation: "None".into(), level_label: "missing".into() },
];
let eval = rubric.evaluate(scores);
assert!(!eval.passed);
assert!(eval.overall_score < 0.7);
}
#[test]
fn evaluation_prompt_includes_all_criteria() {
let rubric = code_review_rubric();
let prompt = rubric.evaluation_prompt("fn main() {}");
assert!(prompt.contains("correctness"));
assert!(prompt.contains("error_handling"));
assert!(prompt.contains("readability"));
assert!(prompt.contains("test_coverage"));
assert!(prompt.contains("fn main()"));
}
#[test]
fn security_rubric_higher_threshold() {
let rubric = security_review_rubric();
assert_eq!(rubric.pass_threshold, 0.8);
assert!(rubric.criteria.iter().any(|c| c.name == "secrets_management"));
}
#[test]
fn weighted_scoring() {
let rubric = code_review_rubric();
let scores = vec![
CriterionScore { criterion: "correctness".into(), score: 1.0, explanation: "".into(), level_label: "".into() },
CriterionScore { criterion: "error_handling".into(), score: 0.0, explanation: "".into(), level_label: "".into() },
CriterionScore { criterion: "readability".into(), score: 0.0, explanation: "".into(), level_label: "".into() },
CriterionScore { criterion: "test_coverage".into(), score: 0.0, explanation: "".into(), level_label: "".into() },
];
let eval = rubric.evaluate(scores);
assert!((eval.overall_score - 2.0/6.0).abs() < 0.01);
assert!(!eval.passed); }
}