#![forbid(unsafe_code)]
#![cfg(feature = "evaluator-rag")]
use std::sync::Arc;
use crate::evaluator::Evaluator;
use crate::score::Score;
use crate::types::{EvalCase, EvalMetricResult, Invocation};
use super::{JudgeEvaluatorConfig, build_prompt_context, evaluate_with_builtin};
fn has_final_response(invocation: &Invocation) -> bool {
invocation
.final_response
.as_deref()
.is_some_and(|s| !s.trim().is_empty())
}
fn has_user_prompt(case: &EvalCase) -> bool {
!case.user_messages.is_empty()
}
fn has_retrieved_context(case: &EvalCase) -> bool {
!case.few_shot_examples.is_empty()
}
macro_rules! rag_evaluator {
(
$(#[$meta:meta])*
$name:ident, $eval_name:literal, $template:literal, $criterion:expr
) => {
$(#[$meta])*
pub struct $name {
config: JudgeEvaluatorConfig,
}
impl $name {
#[must_use]
pub const fn new(config: JudgeEvaluatorConfig) -> Self {
Self { config }
}
#[must_use]
pub fn with_prompt(mut self, template: Arc<dyn crate::prompt::JudgePromptTemplate>) -> Self {
self.config = self.config.with_prompt(template);
self
}
#[must_use]
pub fn with_few_shot(mut self, examples: Vec<crate::types::FewShotExample>) -> Self {
self.config = self.config.with_few_shot(examples);
self
}
#[must_use]
pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
self.config = self.config.with_system_prompt(prompt);
self
}
#[must_use]
pub fn with_output_schema(mut self, schema: serde_json::Value) -> Self {
self.config = self.config.with_output_schema(schema);
self
}
#[must_use]
pub fn with_use_reasoning(mut self, flag: bool) -> Self {
self.config = self.config.with_use_reasoning(flag);
self
}
#[must_use]
pub fn with_feedback_key(mut self, key: impl Into<String>) -> Self {
self.config = self.config.with_feedback_key(key);
self
}
#[must_use]
pub const fn config(&self) -> &JudgeEvaluatorConfig {
&self.config
}
}
impl $crate::evaluators::JudgeEvaluatorBuilder for $name {
fn judge_config_mut(&mut self) -> &mut JudgeEvaluatorConfig {
&mut self.config
}
}
impl Evaluator for $name {
fn name(&self) -> &'static str {
$eval_name
}
fn evaluate(
&self,
case: &EvalCase,
invocation: &Invocation,
) -> Option<EvalMetricResult> {
let criterion: fn(&EvalCase, &Invocation) -> bool = $criterion;
if !criterion(case, invocation) {
return None;
}
Some(evaluate_with_builtin(
$eval_name,
$template,
&self.config,
&build_prompt_context(&self.config, case, invocation),
))
}
}
};
}
rag_evaluator! {
RAGGroundednessEvaluator,
"rag_groundedness",
"rag_groundedness_v0",
|case, invocation| has_retrieved_context(case)
&& has_user_prompt(case)
&& has_final_response(invocation)
}
rag_evaluator! {
RAGRetrievalRelevanceEvaluator,
"rag_retrieval_relevance",
"rag_retrieval_relevance_v0",
|case, _invocation| has_retrieved_context(case) && has_user_prompt(case)
}
rag_evaluator! {
RAGHelpfulnessEvaluator,
"rag_helpfulness",
"rag_helpfulness_v0",
|case, invocation| has_retrieved_context(case)
&& has_user_prompt(case)
&& has_final_response(invocation)
}
#[derive(Debug, thiserror::Error)]
pub enum EmbedderError {
#[error("invalid input: {reason}")]
InvalidInput {
reason: String,
},
#[error("embedder backend error: {reason}")]
Backend {
reason: String,
},
#[error("dimension mismatch: response={response_dim} reference={reference_dim}")]
DimensionMismatch {
response_dim: usize,
reference_dim: usize,
},
}
pub trait Embedder: Send + Sync {
fn embed(&self, text: &str) -> Result<Vec<f32>, EmbedderError>;
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f64 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let mut dot: f64 = 0.0;
let mut na: f64 = 0.0;
let mut nb: f64 = 0.0;
for (x, y) in a.iter().zip(b.iter()) {
let xf = f64::from(*x);
let yf = f64::from(*y);
dot += xf * yf;
na += xf * xf;
nb += yf * yf;
}
if na == 0.0 || nb == 0.0 {
return 0.0;
}
let sim = dot / (na.sqrt() * nb.sqrt());
sim.clamp(-1.0, 1.0)
}
pub const DEFAULT_EMBEDDING_SIMILARITY_THRESHOLD: f64 = 0.8;
pub struct EmbeddingSimilarityEvaluator {
name: &'static str,
reference: String,
threshold: f64,
embedder: Arc<dyn Embedder>,
}
impl EmbeddingSimilarityEvaluator {
#[must_use]
pub fn new(reference: impl Into<String>, embedder: Arc<dyn Embedder>) -> Self {
Self {
name: "embedding_similarity",
reference: reference.into(),
threshold: DEFAULT_EMBEDDING_SIMILARITY_THRESHOLD,
embedder,
}
}
#[must_use]
pub const fn with_name(mut self, name: &'static str) -> Self {
self.name = name;
self
}
#[must_use]
pub const fn with_threshold(mut self, threshold: f64) -> Self {
self.threshold = threshold;
self
}
#[must_use]
pub fn reference(&self) -> &str {
&self.reference
}
#[must_use]
pub const fn threshold(&self) -> f64 {
self.threshold
}
}
impl Evaluator for EmbeddingSimilarityEvaluator {
fn name(&self) -> &'static str {
self.name
}
fn evaluate(&self, _case: &EvalCase, invocation: &Invocation) -> Option<EvalMetricResult> {
let actual = invocation.final_response.as_deref()?;
if actual.trim().is_empty() {
return None;
}
let name = self.name.to_string();
let a = match self.embedder.embed(actual) {
Ok(v) => v,
Err(err) => {
return Some(EvalMetricResult {
evaluator_name: name,
score: Score::fail(),
details: Some(format!("embed_response: {err}")),
});
}
};
let b = match self.embedder.embed(&self.reference) {
Ok(v) => v,
Err(err) => {
return Some(EvalMetricResult {
evaluator_name: name,
score: Score::fail(),
details: Some(format!("embed_reference: {err}")),
});
}
};
if a.len() != b.len() {
let err = EmbedderError::DimensionMismatch {
response_dim: a.len(),
reference_dim: b.len(),
};
return Some(EvalMetricResult {
evaluator_name: name,
score: Score::fail(),
details: Some(err.to_string()),
});
}
let raw = cosine_similarity(&a, &b);
let remapped = f64::midpoint(raw, 1.0).clamp(0.0, 1.0);
let score = Score::new(remapped, self.threshold);
Some(EvalMetricResult {
evaluator_name: name,
score,
details: Some(format!(
"cosine_similarity={raw:.4} remapped={remapped:.4} threshold={:.4}",
self.threshold
)),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cosine_similarity_of_identical_vectors_is_one() {
let a = vec![1.0_f32, 0.0, 0.0];
assert!((cosine_similarity(&a, &a) - 1.0).abs() < 1e-9);
}
#[test]
fn cosine_similarity_of_opposite_vectors_is_minus_one() {
let a = vec![1.0_f32, 0.0];
let b = vec![-1.0_f32, 0.0];
assert!((cosine_similarity(&a, &b) + 1.0).abs() < 1e-9);
}
#[test]
fn cosine_similarity_orthogonal_vectors_is_zero() {
let a = vec![1.0_f32, 0.0];
let b = vec![0.0_f32, 1.0];
assert!(cosine_similarity(&a, &b).abs() < 1e-9);
}
#[test]
fn cosine_similarity_mismatched_dims_is_zero() {
let a = vec![1.0_f32, 0.0];
let b = vec![1.0_f32];
assert!(cosine_similarity(&a, &b).abs() < 1e-9);
}
#[test]
fn cosine_similarity_empty_vectors_is_zero() {
let a: Vec<f32> = vec![];
let b: Vec<f32> = vec![];
assert!(cosine_similarity(&a, &b).abs() < 1e-9);
}
}