#![forbid(unsafe_code)]
#![cfg(feature = "evaluator-agent")]
use std::sync::Arc;
use crate::evaluator::Evaluator;
use crate::types::{EvalCase, EvalMetricResult, Invocation};
use super::{JudgeEvaluatorConfig, build_prompt_context, evaluate_with_builtin};
fn has_final_response(invocation: &Invocation) -> bool {
invocation
.final_response
.as_deref()
.is_some_and(|s| !s.trim().is_empty())
}
fn has_user_prompt(case: &EvalCase) -> bool {
!case.user_messages.is_empty()
}
macro_rules! agent_evaluator {
(
$(#[$meta:meta])*
$name:ident, $eval_name:literal, $template:literal, $criterion:expr
) => {
$(#[$meta])*
pub struct $name {
config: JudgeEvaluatorConfig,
}
impl $name {
#[must_use]
pub const fn new(config: JudgeEvaluatorConfig) -> Self {
Self { config }
}
#[must_use]
pub fn with_prompt(mut self, template: Arc<dyn crate::prompt::JudgePromptTemplate>) -> Self {
self.config = self.config.with_prompt(template);
self
}
#[must_use]
pub fn with_few_shot(mut self, examples: Vec<crate::types::FewShotExample>) -> Self {
self.config = self.config.with_few_shot(examples);
self
}
#[must_use]
pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
self.config = self.config.with_system_prompt(prompt);
self
}
#[must_use]
pub fn with_output_schema(mut self, schema: serde_json::Value) -> Self {
self.config = self.config.with_output_schema(schema);
self
}
#[must_use]
pub fn with_use_reasoning(mut self, flag: bool) -> Self {
self.config = self.config.with_use_reasoning(flag);
self
}
#[must_use]
pub fn with_feedback_key(mut self, key: impl Into<String>) -> Self {
self.config = self.config.with_feedback_key(key);
self
}
#[must_use]
pub const fn config(&self) -> &JudgeEvaluatorConfig {
&self.config
}
}
impl $crate::evaluators::JudgeEvaluatorBuilder for $name {
fn judge_config_mut(&mut self) -> &mut JudgeEvaluatorConfig {
&mut self.config
}
}
impl Evaluator for $name {
fn name(&self) -> &'static str {
$eval_name
}
fn evaluate(
&self,
case: &EvalCase,
invocation: &Invocation,
) -> Option<EvalMetricResult> {
let criterion: fn(&EvalCase, &Invocation) -> bool = $criterion;
if !criterion(case, invocation) {
return None;
}
Some(evaluate_with_builtin(
$eval_name,
$template,
&self.config,
&build_prompt_context(&self.config, case, invocation),
))
}
}
};
}
agent_evaluator! {
TrajectoryAccuracyEvaluator,
"trajectory_accuracy",
"trajectory_accuracy_v0",
|case, invocation| has_user_prompt(case) && has_final_response(invocation)
}
agent_evaluator! {
TrajectoryAccuracyWithRefEvaluator,
"trajectory_accuracy_with_ref",
"trajectory_accuracy_with_ref_v0",
|case, invocation| case.expected_trajectory.is_some()
&& has_user_prompt(case)
&& has_final_response(invocation)
}
agent_evaluator! {
TaskCompletionEvaluator,
"task_completion",
"task_completion_v0",
|case, invocation| case.expected_assertion.is_some() && has_final_response(invocation)
}
agent_evaluator! {
UserSatisfactionEvaluator,
"user_satisfaction",
"user_satisfaction_v0",
|case, invocation| has_user_prompt(case) && has_final_response(invocation)
}
agent_evaluator! {
AgentToneEvaluator,
"agent_tone",
"agent_tone_v0",
|_case, invocation| has_final_response(invocation)
}
agent_evaluator! {
KnowledgeRetentionEvaluator,
"knowledge_retention",
"knowledge_retention_v0",
|case, invocation| has_user_prompt(case) && has_final_response(invocation)
}
agent_evaluator! {
LanguageDetectionEvaluator,
"language_detection",
"language_detection_v0",
|case, invocation| has_user_prompt(case) && has_final_response(invocation)
}
agent_evaluator! {
PerceivedErrorEvaluator,
"perceived_error",
"perceived_error_v0",
|_case, invocation| has_final_response(invocation)
}
agent_evaluator! {
InteractionsEvaluator,
"interactions",
"interactions_v0",
|case, invocation| case.expected_interactions.is_some()
&& has_user_prompt(case)
&& has_final_response(invocation)
}