use serde::{Deserialize, Serialize};
use schemars::JsonSchema;
use zeroclaw_macros::Configurable;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ComplexityTier {
Simple,
Standard,
Complex,
}
const REASONING_KEYWORDS: &[&str] = &[
"explain",
"why",
"analyze",
"compare",
"design",
"implement",
"refactor",
"debug",
"optimize",
"architecture",
"trade-off",
"tradeoff",
"reasoning",
"step by step",
"think through",
"evaluate",
"critique",
"pros and cons",
];
pub fn estimate_complexity(message: &str) -> ComplexityTier {
let lower = message.to_lowercase();
let len = message.len();
let keyword_count = REASONING_KEYWORDS
.iter()
.filter(|kw| lower.contains(**kw))
.count();
let has_code_fence = message.contains("```");
if len > 200 || has_code_fence || keyword_count >= 2 {
return ComplexityTier::Complex;
}
if len < 50 && keyword_count == 0 {
return ComplexityTier::Simple;
}
ComplexityTier::Standard
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Configurable)]
#[prefix = "agent.auto-classify"]
pub struct AutoClassifyConfig {
#[serde(default)]
pub simple_hint: Option<String>,
#[serde(default)]
pub standard_hint: Option<String>,
#[serde(default)]
pub complex_hint: Option<String>,
#[serde(default = "default_cost_optimized_hint")]
pub cost_optimized_hint: String,
}
fn default_cost_optimized_hint() -> String {
"cost-optimized".to_string()
}
impl Default for AutoClassifyConfig {
fn default() -> Self {
Self {
simple_hint: None,
standard_hint: None,
complex_hint: None,
cost_optimized_hint: default_cost_optimized_hint(),
}
}
}
impl AutoClassifyConfig {
pub fn hint_for(&self, tier: ComplexityTier) -> Option<&str> {
match tier {
ComplexityTier::Simple => self.simple_hint.as_deref(),
ComplexityTier::Standard => self.standard_hint.as_deref(),
ComplexityTier::Complex => self.complex_hint.as_deref(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Configurable)]
#[prefix = "agent.eval"]
pub struct EvalConfig {
#[serde(default)]
pub enabled: bool,
#[serde(default = "default_min_quality_score")]
pub min_quality_score: f64,
#[serde(default = "default_max_retries")]
pub max_retries: u32,
}
fn default_min_quality_score() -> f64 {
0.5
}
fn default_max_retries() -> u32 {
1
}
impl Default for EvalConfig {
fn default() -> Self {
Self {
enabled: false,
min_quality_score: default_min_quality_score(),
max_retries: default_max_retries(),
}
}
}
#[derive(Debug, Clone)]
pub struct EvalResult {
pub score: f64,
pub checks: Vec<EvalCheck>,
pub retry_hint: Option<String>,
}
#[derive(Debug, Clone)]
pub struct EvalCheck {
pub name: &'static str,
pub passed: bool,
pub weight: f64,
}
const CODE_KEYWORDS: &[&str] = &[
"code",
"function",
"implement",
"class",
"struct",
"module",
"script",
"program",
"bug",
"error",
"compile",
"syntax",
"refactor",
];
pub fn evaluate_response(
query: &str,
response: &str,
complexity: ComplexityTier,
auto_classify: Option<&AutoClassifyConfig>,
) -> EvalResult {
let mut checks = Vec::new();
let non_empty = !response.trim().is_empty();
checks.push(EvalCheck {
name: "non_empty",
passed: non_empty,
weight: 0.3,
});
let lower_resp = response.to_lowercase();
let cop_out_phrases = [
"i don't know",
"i'm not sure",
"i cannot",
"i can't help",
"as an ai",
];
let is_cop_out = cop_out_phrases
.iter()
.any(|phrase| lower_resp.starts_with(phrase));
let not_cop_out = !is_cop_out || response.len() > 200; checks.push(EvalCheck {
name: "not_cop_out",
passed: not_cop_out,
weight: 0.25,
});
let min_len = match complexity {
ComplexityTier::Simple => 5,
ComplexityTier::Standard => 20,
ComplexityTier::Complex => 50,
};
let sufficient_length = response.len() >= min_len;
checks.push(EvalCheck {
name: "sufficient_length",
passed: sufficient_length,
weight: 0.2,
});
let query_lower = query.to_lowercase();
let expects_code = CODE_KEYWORDS.iter().any(|kw| query_lower.contains(kw));
let has_code = response.contains("```") || response.contains(" "); let code_check_passed = !expects_code || has_code;
checks.push(EvalCheck {
name: "code_presence",
passed: code_check_passed,
weight: 0.25,
});
let total_weight: f64 = checks.iter().map(|c| c.weight).sum();
let earned: f64 = checks.iter().filter(|c| c.passed).map(|c| c.weight).sum();
let score = if total_weight > 0.0 {
earned / total_weight
} else {
1.0
};
let retry_hint = if score <= default_min_quality_score() {
let next_tier = match complexity {
ComplexityTier::Simple => Some(ComplexityTier::Standard),
ComplexityTier::Standard => Some(ComplexityTier::Complex),
ComplexityTier::Complex => None, };
next_tier.and_then(|tier| {
auto_classify
.and_then(|ac| ac.hint_for(tier))
.map(String::from)
})
} else {
None
};
EvalResult {
score,
checks,
retry_hint,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple_short_message() {
assert_eq!(estimate_complexity("hi"), ComplexityTier::Simple);
assert_eq!(estimate_complexity("hello"), ComplexityTier::Simple);
assert_eq!(estimate_complexity("yes"), ComplexityTier::Simple);
}
#[test]
fn complex_long_message() {
let long = "a".repeat(201);
assert_eq!(estimate_complexity(&long), ComplexityTier::Complex);
}
#[test]
fn complex_code_fence() {
let msg = "Here is some code:\n```rust\nfn main() {}\n```";
assert_eq!(estimate_complexity(msg), ComplexityTier::Complex);
}
#[test]
fn complex_multiple_reasoning_keywords() {
let msg = "Please explain why this design is better and analyze the trade-off";
assert_eq!(estimate_complexity(msg), ComplexityTier::Complex);
}
#[test]
fn standard_medium_message() {
let msg = "Can you help me find a good restaurant in this area please?";
assert_eq!(estimate_complexity(msg), ComplexityTier::Standard);
}
#[test]
fn standard_short_with_one_keyword() {
let msg = "explain this";
assert_eq!(estimate_complexity(msg), ComplexityTier::Standard);
}
#[test]
fn auto_classify_maps_tiers_to_hints() {
let ac = AutoClassifyConfig {
simple_hint: Some("fast".into()),
standard_hint: None,
complex_hint: Some("reasoning".into()),
..Default::default()
};
assert_eq!(ac.hint_for(ComplexityTier::Simple), Some("fast"));
assert_eq!(ac.hint_for(ComplexityTier::Standard), None);
assert_eq!(ac.hint_for(ComplexityTier::Complex), Some("reasoning"));
}
#[test]
fn empty_response_scores_low() {
let result = evaluate_response("hello", "", ComplexityTier::Simple, None);
assert!(result.score <= 0.5, "empty response should score low");
}
#[test]
fn good_response_scores_high() {
let result = evaluate_response(
"what is 2+2?",
"The answer is 4.",
ComplexityTier::Simple,
None,
);
assert!(
result.score >= 0.9,
"good simple response should score high, got {}",
result.score
);
}
#[test]
fn cop_out_response_penalized() {
let result = evaluate_response(
"explain quantum computing",
"I don't know much about that.",
ComplexityTier::Standard,
None,
);
assert!(
result.score < 1.0,
"cop-out should be penalized, got {}",
result.score
);
}
#[test]
fn code_query_without_code_response_penalized() {
let result = evaluate_response(
"write a function to sort an array",
"You should use a sorting algorithm.",
ComplexityTier::Standard,
None,
);
let code_check = result.checks.iter().find(|c| c.name == "code_presence");
assert!(
code_check.is_some() && !code_check.unwrap().passed,
"code check should fail"
);
}
#[test]
fn retry_hint_escalation() {
let ac = AutoClassifyConfig {
simple_hint: Some("fast".into()),
standard_hint: Some("default".into()),
complex_hint: Some("reasoning".into()),
..Default::default()
};
let result = evaluate_response("hello", "", ComplexityTier::Simple, Some(&ac));
assert_eq!(result.retry_hint, Some("default".into()));
}
#[test]
fn no_retry_when_already_complex() {
let ac = AutoClassifyConfig {
simple_hint: Some("fast".into()),
standard_hint: Some("default".into()),
complex_hint: Some("reasoning".into()),
..Default::default()
};
let result =
evaluate_response("explain everything", "", ComplexityTier::Complex, Some(&ac));
assert_eq!(result.retry_hint, None);
}
#[test]
fn max_retries_defaults() {
let config = EvalConfig::default();
assert!(!config.enabled);
assert_eq!(config.max_retries, 1);
assert!((config.min_quality_score - 0.5).abs() < f64::EPSILON);
}
#[test]
fn cost_optimized_hint_default() {
let config = AutoClassifyConfig::default();
assert_eq!(config.cost_optimized_hint, "cost-optimized");
}
}