#![forbid(unsafe_code)]
#![cfg(feature = "evaluator-quality")]
use std::sync::Arc;
use crate::evaluator::Evaluator;
use crate::types::{AssertionKind, EvalCase, EvalMetricResult, Invocation};
use super::{JudgeEvaluatorConfig, build_prompt_context, evaluate_with_builtin};
macro_rules! simple_quality_evaluator {
(
$(#[$meta:meta])*
$name:ident, $eval_name:literal, $template:literal, $criterion:expr
) => {
$(#[$meta])*
pub struct $name {
config: JudgeEvaluatorConfig,
}
impl $name {
#[must_use]
pub const fn new(config: JudgeEvaluatorConfig) -> Self {
Self { config }
}
#[must_use]
pub fn with_prompt(mut self, template: Arc<dyn crate::prompt::JudgePromptTemplate>) -> Self {
self.config = self.config.with_prompt(template);
self
}
#[must_use]
pub fn with_few_shot(mut self, examples: Vec<crate::types::FewShotExample>) -> Self {
self.config = self.config.with_few_shot(examples);
self
}
#[must_use]
pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
self.config = self.config.with_system_prompt(prompt);
self
}
#[must_use]
pub fn with_output_schema(mut self, schema: serde_json::Value) -> Self {
self.config = self.config.with_output_schema(schema);
self
}
#[must_use]
pub fn with_use_reasoning(mut self, flag: bool) -> Self {
self.config = self.config.with_use_reasoning(flag);
self
}
#[must_use]
pub fn with_feedback_key(mut self, key: impl Into<String>) -> Self {
self.config = self.config.with_feedback_key(key);
self
}
#[must_use]
pub const fn config(&self) -> &JudgeEvaluatorConfig {
&self.config
}
}
impl $crate::evaluators::JudgeEvaluatorBuilder for $name {
fn judge_config_mut(&mut self) -> &mut JudgeEvaluatorConfig {
&mut self.config
}
}
impl Evaluator for $name {
fn name(&self) -> &'static str {
$eval_name
}
fn evaluate(
&self,
case: &EvalCase,
invocation: &Invocation,
) -> Option<EvalMetricResult> {
let criterion: fn(&EvalCase, &Invocation) -> bool = $criterion;
if !criterion(case, invocation) {
return None;
}
Some(evaluate_with_builtin(
$eval_name,
$template,
&self.config,
&build_prompt_context(&self.config, case, invocation),
))
}
}
};
}
fn has_final_response(_case: &EvalCase, invocation: &Invocation) -> bool {
invocation
.final_response
.as_deref()
.is_some_and(|s| !s.trim().is_empty())
}
fn has_user_prompt_and_response(case: &EvalCase, invocation: &Invocation) -> bool {
!case.user_messages.is_empty() && has_final_response(case, invocation)
}
fn has_system_prompt_and_response(case: &EvalCase, invocation: &Invocation) -> bool {
!case.system_prompt.trim().is_empty() && has_final_response(case, invocation)
}
fn has_retrieved_context(case: &EvalCase, invocation: &Invocation) -> bool {
has_final_response(case, invocation) && !case.few_shot_examples.is_empty()
}
simple_quality_evaluator! {
HelpfulnessEvaluator,
"helpfulness",
"helpfulness_v0",
has_user_prompt_and_response
}
simple_quality_evaluator! {
CorrectnessEvaluator,
"correctness",
"correctness_v0",
has_user_prompt_and_response
}
simple_quality_evaluator! {
ConcisenessEvaluator,
"conciseness",
"conciseness_v0",
has_final_response
}
simple_quality_evaluator! {
CoherenceEvaluator,
"coherence",
"coherence_v0",
has_final_response
}
simple_quality_evaluator! {
ResponseRelevanceEvaluator,
"response_relevance",
"response_relevance_v0",
has_user_prompt_and_response
}
simple_quality_evaluator! {
HallucinationEvaluator,
"hallucination",
"hallucination_v0",
has_user_prompt_and_response
}
simple_quality_evaluator! {
FaithfulnessEvaluator,
"faithfulness",
"faithfulness_v0",
has_retrieved_context
}
simple_quality_evaluator! {
PlanAdherenceEvaluator,
"plan_adherence",
"plan_adherence_v0",
has_system_prompt_and_response
}
simple_quality_evaluator! {
LazinessEvaluator,
"laziness",
"laziness_v0",
has_user_prompt_and_response
}
pub struct GoalSuccessRateEvaluator {
config: JudgeEvaluatorConfig,
}
impl GoalSuccessRateEvaluator {
#[must_use]
pub const fn new(config: JudgeEvaluatorConfig) -> Self {
Self { config }
}
#[must_use]
pub fn with_prompt(mut self, template: Arc<dyn crate::prompt::JudgePromptTemplate>) -> Self {
self.config = self.config.with_prompt(template);
self
}
#[must_use]
pub fn with_few_shot(mut self, examples: Vec<crate::types::FewShotExample>) -> Self {
self.config = self.config.with_few_shot(examples);
self
}
#[must_use]
pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
self.config = self.config.with_system_prompt(prompt);
self
}
#[must_use]
pub fn with_output_schema(mut self, schema: serde_json::Value) -> Self {
self.config = self.config.with_output_schema(schema);
self
}
#[must_use]
pub fn with_use_reasoning(mut self, flag: bool) -> Self {
self.config = self.config.with_use_reasoning(flag);
self
}
#[must_use]
pub fn with_feedback_key(mut self, key: impl Into<String>) -> Self {
self.config = self.config.with_feedback_key(key);
self
}
#[must_use]
pub const fn config(&self) -> &JudgeEvaluatorConfig {
&self.config
}
}
impl crate::evaluators::JudgeEvaluatorBuilder for GoalSuccessRateEvaluator {
fn judge_config_mut(&mut self) -> &mut JudgeEvaluatorConfig {
&mut self.config
}
}
impl Evaluator for GoalSuccessRateEvaluator {
fn name(&self) -> &'static str {
"goal_success_rate"
}
fn evaluate(&self, case: &EvalCase, invocation: &Invocation) -> Option<EvalMetricResult> {
let _assertion = case.expected_assertion.as_ref()?;
if !has_final_response(case, invocation) {
return None;
}
Some(evaluate_with_builtin(
"goal_success_rate",
"goal_success_rate_v0",
&self.config,
&build_prompt_context(&self.config, case, invocation),
))
}
}
#[must_use]
pub fn assertion_implies_goal_completion(case: &EvalCase) -> bool {
matches!(
case.expected_assertion.as_ref().map(|a| &a.kind),
Some(AssertionKind::GoalCompleted | AssertionKind::Custom { .. })
)
}