use super::{
EvaluationData, EvaluationMetadata, EvaluationResult, EvaluationSummary, Evaluator,
EvaluatorConfig, EvaluatorPerformance, PerformanceStats, QueryEvaluationResult,
};
use crate::RragResult;
use std::collections::HashMap;
pub struct RagasEvaluator {
config: RagasConfig,
metrics: Vec<Box<dyn RagasMetric>>,
}
#[derive(Debug, Clone)]
pub struct RagasConfig {
pub enabled_metrics: Vec<RagasMetricType>,
pub faithfulness_config: FaithfulnessConfig,
pub answer_relevancy_config: AnswerRelevancyConfig,
pub context_precision_config: ContextPrecisionConfig,
pub context_recall_config: ContextRecallConfig,
pub context_relevancy_config: ContextRelevancyConfig,
pub answer_similarity_config: AnswerSimilarityConfig,
pub answer_correctness_config: AnswerCorrectnessConfig,
}
impl Default for RagasConfig {
fn default() -> Self {
Self {
enabled_metrics: vec![
RagasMetricType::Faithfulness,
RagasMetricType::AnswerRelevancy,
RagasMetricType::ContextPrecision,
RagasMetricType::ContextRecall,
RagasMetricType::ContextRelevancy,
],
faithfulness_config: FaithfulnessConfig::default(),
answer_relevancy_config: AnswerRelevancyConfig::default(),
context_precision_config: ContextPrecisionConfig::default(),
context_recall_config: ContextRecallConfig::default(),
context_relevancy_config: ContextRelevancyConfig::default(),
answer_similarity_config: AnswerSimilarityConfig::default(),
answer_correctness_config: AnswerCorrectnessConfig::default(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RagasMetricType {
Faithfulness,
AnswerRelevancy,
ContextPrecision,
ContextRecall,
ContextRelevancy,
AnswerSimilarity,
AnswerCorrectness,
Harmfulness,
Maliciousness,
Coherence,
Conciseness,
}
pub trait RagasMetric: Send + Sync {
fn name(&self) -> &str;
fn metric_type(&self) -> RagasMetricType;
fn evaluate_query(
&self,
query: &str,
contexts: &[String],
answer: &str,
ground_truth: Option<&str>,
) -> RragResult<f32>;
fn evaluate_batch(
&self,
queries: &[String],
contexts: &[Vec<String>],
answers: &[String],
ground_truths: &[Option<String>],
) -> RragResult<Vec<f32>> {
let mut scores = Vec::new();
for (i, query) in queries.iter().enumerate() {
let query_contexts = contexts.get(i).map(|c| c.as_slice()).unwrap_or(&[]);
let answer = answers.get(i).map(|a| a.as_str()).unwrap_or("");
let ground_truth = ground_truths
.get(i)
.and_then(|gt| gt.as_ref())
.map(|s| s.as_str());
let score = self.evaluate_query(query, query_contexts, answer, ground_truth)?;
scores.push(score);
}
Ok(scores)
}
fn get_config(&self) -> RagasMetricConfig;
}
#[derive(Debug, Clone)]
pub struct RagasMetricConfig {
pub name: String,
pub requires_ground_truth: bool,
pub requires_context: bool,
pub score_range: (f32, f32),
pub higher_is_better: bool,
}
#[derive(Debug, Clone)]
pub struct FaithfulnessConfig {
pub use_nli_model: bool,
pub batch_size: usize,
pub similarity_threshold: f32,
}
impl Default for FaithfulnessConfig {
fn default() -> Self {
Self {
use_nli_model: false, batch_size: 10,
similarity_threshold: 0.7,
}
}
}
#[derive(Debug, Clone)]
pub struct AnswerRelevancyConfig {
pub use_question_generation: bool,
pub num_generated_questions: usize,
pub similarity_threshold: f32,
}
impl Default for AnswerRelevancyConfig {
fn default() -> Self {
Self {
use_question_generation: false,
num_generated_questions: 3,
similarity_threshold: 0.7,
}
}
}
#[derive(Debug, Clone)]
pub struct ContextPrecisionConfig {
pub use_binary_relevance: bool,
pub relevance_threshold: f32,
}
impl Default for ContextPrecisionConfig {
fn default() -> Self {
Self {
use_binary_relevance: true,
relevance_threshold: 0.5,
}
}
}
#[derive(Debug, Clone)]
pub struct ContextRecallConfig {
pub sentence_similarity_threshold: f32,
pub use_semantic_similarity: bool,
}
impl Default for ContextRecallConfig {
fn default() -> Self {
Self {
sentence_similarity_threshold: 0.7,
use_semantic_similarity: true,
}
}
}
#[derive(Debug, Clone)]
pub struct ContextRelevancyConfig {
pub relevance_threshold: f32,
}
impl Default for ContextRelevancyConfig {
fn default() -> Self {
Self {
relevance_threshold: 0.7,
}
}
}
#[derive(Debug, Clone)]
pub struct AnswerSimilarityConfig {
pub similarity_method: SimilarityMethod,
pub weight_factual: f32,
pub weight_semantic: f32,
}
impl Default for AnswerSimilarityConfig {
fn default() -> Self {
Self {
similarity_method: SimilarityMethod::Cosine,
weight_factual: 0.7,
weight_semantic: 0.3,
}
}
}
#[derive(Debug, Clone)]
pub struct AnswerCorrectnessConfig {
pub use_fact_checking: bool,
pub factual_weight: f32,
pub semantic_weight: f32,
}
impl Default for AnswerCorrectnessConfig {
fn default() -> Self {
Self {
use_fact_checking: false,
factual_weight: 0.75,
semantic_weight: 0.25,
}
}
}
#[derive(Debug, Clone)]
pub enum SimilarityMethod {
Cosine,
Jaccard,
Bleu,
Rouge,
}
impl RagasEvaluator {
pub fn new(config: RagasConfig) -> Self {
let mut evaluator = Self {
config: config.clone(),
metrics: Vec::new(),
};
evaluator.initialize_metrics();
evaluator
}
fn initialize_metrics(&mut self) {
for metric_type in &self.config.enabled_metrics {
let metric: Box<dyn RagasMetric> = match metric_type {
RagasMetricType::Faithfulness => Box::new(FaithfulnessMetric::new(
self.config.faithfulness_config.clone(),
)),
RagasMetricType::AnswerRelevancy => Box::new(AnswerRelevancyMetric::new(
self.config.answer_relevancy_config.clone(),
)),
RagasMetricType::ContextPrecision => Box::new(ContextPrecisionMetric::new(
self.config.context_precision_config.clone(),
)),
RagasMetricType::ContextRecall => Box::new(ContextRecallMetric::new(
self.config.context_recall_config.clone(),
)),
RagasMetricType::ContextRelevancy => Box::new(ContextRelevancyMetric::new(
self.config.context_relevancy_config.clone(),
)),
RagasMetricType::AnswerSimilarity => Box::new(AnswerSimilarityMetric::new(
self.config.answer_similarity_config.clone(),
)),
RagasMetricType::AnswerCorrectness => Box::new(AnswerCorrectnessMetric::new(
self.config.answer_correctness_config.clone(),
)),
_ => continue, };
self.metrics.push(metric);
}
}
}
impl Evaluator for RagasEvaluator {
fn name(&self) -> &str {
"RAGAS"
}
fn evaluate(&self, data: &EvaluationData) -> RragResult<EvaluationResult> {
let start_time = std::time::Instant::now();
let mut overall_scores = HashMap::new();
let mut per_query_results = Vec::new();
let mut all_metric_scores: HashMap<String, Vec<f32>> = HashMap::new();
for query in &data.queries {
let mut query_scores = HashMap::new();
let system_response = data
.system_responses
.iter()
.find(|r| r.query_id == query.id);
let ground_truth = data.ground_truth.iter().find(|gt| gt.query_id == query.id);
if let Some(response) = system_response {
let contexts: Vec<String> = response
.retrieved_docs
.iter()
.map(|doc| doc.content.clone())
.collect();
let answer = response.generated_answer.as_deref().unwrap_or("");
let ground_truth_answer = ground_truth.and_then(|gt| gt.expected_answer.as_deref());
for metric in &self.metrics {
match metric.evaluate_query(
&query.query,
&contexts,
answer,
ground_truth_answer,
) {
Ok(score) => {
let metric_name = metric.name().to_string();
query_scores.insert(metric_name.clone(), score);
all_metric_scores
.entry(metric_name)
.or_insert_with(Vec::new)
.push(score);
}
Err(e) => {
eprintln!(
"Warning: Failed to evaluate {} for query {}: {}",
metric.name(),
query.id,
e
);
}
}
}
}
per_query_results.push(QueryEvaluationResult {
query_id: query.id.clone(),
scores: query_scores,
errors: Vec::new(),
details: HashMap::new(),
});
}
for (metric_name, scores) in &all_metric_scores {
if !scores.is_empty() {
let average = scores.iter().sum::<f32>() / scores.len() as f32;
overall_scores.insert(metric_name.clone(), average);
}
}
let mut avg_scores = HashMap::new();
let mut std_deviations = HashMap::new();
for (metric_name, scores) in &all_metric_scores {
if !scores.is_empty() {
let avg = scores.iter().sum::<f32>() / scores.len() as f32;
avg_scores.insert(metric_name.clone(), avg);
let variance = scores
.iter()
.map(|score| (score - avg).powi(2))
.sum::<f32>()
/ scores.len() as f32;
std_deviations.insert(metric_name.clone(), variance.sqrt());
}
}
let total_time = start_time.elapsed().as_millis() as f32;
let insights = self.generate_insights(&overall_scores, &std_deviations);
let recommendations = self.generate_recommendations(&overall_scores);
Ok(EvaluationResult {
id: uuid::Uuid::new_v4().to_string(),
evaluation_type: "RAGAS".to_string(),
overall_scores,
per_query_results,
summary: EvaluationSummary {
total_queries: data.queries.len(),
avg_scores,
std_deviations,
performance_stats: PerformanceStats {
avg_eval_time_ms: total_time / data.queries.len() as f32,
total_eval_time_ms: total_time,
peak_memory_usage_mb: 50.0, throughput_qps: data.queries.len() as f32 / (total_time / 1000.0),
},
insights,
recommendations,
},
metadata: EvaluationMetadata {
timestamp: chrono::Utc::now(),
evaluation_version: "1.0.0".to_string(),
system_config: HashMap::new(),
environment: std::env::vars().collect(),
git_commit: None,
},
})
}
fn supported_metrics(&self) -> Vec<String> {
self.metrics.iter().map(|m| m.name().to_string()).collect()
}
fn get_config(&self) -> EvaluatorConfig {
EvaluatorConfig {
name: "RAGAS".to_string(),
version: "1.0.0".to_string(),
metrics: self.supported_metrics(),
performance: EvaluatorPerformance {
avg_time_per_sample_ms: 100.0,
memory_usage_mb: 50.0,
accuracy: 0.9,
},
}
}
}
impl RagasEvaluator {
fn generate_insights(
&self,
scores: &HashMap<String, f32>,
std_devs: &HashMap<String, f32>,
) -> Vec<String> {
let mut insights = Vec::new();
let avg_score: f32 = scores.values().sum::<f32>() / scores.len() as f32;
if avg_score > 0.8 {
insights.push("🟢 Overall RAGAS performance is excellent".to_string());
} else if avg_score > 0.6 {
insights
.push("🟡 Overall RAGAS performance is good with room for improvement".to_string());
} else {
insights.push("🔴 Overall RAGAS performance needs significant improvement".to_string());
}
if let Some(&faithfulness) = scores.get("faithfulness") {
if faithfulness < 0.7 {
insights.push(
"⚠️ Low faithfulness score indicates potential hallucination issues"
.to_string(),
);
}
}
if let Some(&context_precision) = scores.get("context_precision") {
if context_precision < 0.6 {
insights.push(
"🎯 Low context precision suggests retrieval is returning irrelevant documents"
.to_string(),
);
}
}
if let Some(&context_recall) = scores.get("context_recall") {
if context_recall < 0.6 {
insights.push("📚 Low context recall indicates important information may be missing from retrieval".to_string());
}
}
let high_variance_metrics: Vec<&String> = std_devs
.iter()
.filter(|(_, &std_dev)| std_dev > 0.2)
.map(|(name, _)| name)
.collect();
if !high_variance_metrics.is_empty() {
insights.push(format!("📊 High variance detected in: {}. This indicates inconsistent performance across queries",
high_variance_metrics.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ")));
}
insights
}
fn generate_recommendations(&self, scores: &HashMap<String, f32>) -> Vec<String> {
let mut recommendations = Vec::new();
if let Some(&faithfulness) = scores.get("faithfulness") {
if faithfulness < 0.7 {
recommendations.push(
"📖 Implement stronger grounding mechanisms to improve faithfulness"
.to_string(),
);
recommendations.push(
"🔍 Consider post-processing to filter out potential hallucinations"
.to_string(),
);
}
}
if let Some(&context_precision) = scores.get("context_precision") {
if context_precision < 0.6 {
recommendations.push(
"🎯 Improve retrieval ranking to surface more relevant documents first"
.to_string(),
);
recommendations.push(
"⚡ Consider using reranking models to improve context quality".to_string(),
);
}
}
if let Some(&context_recall) = scores.get("context_recall") {
if context_recall < 0.6 {
recommendations.push("📈 Increase the number of retrieved documents".to_string());
recommendations
.push("🔧 Tune embedding models or retrieval parameters".to_string());
}
}
if let Some(&answer_relevancy) = scores.get("answer_relevancy") {
if answer_relevancy < 0.6 {
recommendations.push(
"💬 Improve prompt engineering to generate more relevant answers".to_string(),
);
recommendations.push(
"🧠 Consider fine-tuning the generation model on domain-specific data"
.to_string(),
);
}
}
recommendations
}
}
struct FaithfulnessMetric {
config: FaithfulnessConfig,
}
impl FaithfulnessMetric {
fn new(config: FaithfulnessConfig) -> Self {
Self { config }
}
}
impl RagasMetric for FaithfulnessMetric {
fn name(&self) -> &str {
"faithfulness"
}
fn metric_type(&self) -> RagasMetricType {
RagasMetricType::Faithfulness
}
fn evaluate_query(
&self,
_query: &str,
contexts: &[String],
answer: &str,
_ground_truth: Option<&str>,
) -> RragResult<f32> {
if contexts.is_empty() || answer.is_empty() {
return Ok(0.0);
}
let answer_lower = answer.to_lowercase();
let answer_words: std::collections::HashSet<&str> =
answer_lower.split_whitespace().collect();
let context_text = contexts.join(" ");
let context_lower = context_text.to_lowercase();
let context_words: std::collections::HashSet<&str> =
context_lower.split_whitespace().collect();
let overlap = answer_words.intersection(&context_words).count();
let faithfulness = if answer_words.is_empty() {
0.0
} else {
overlap as f32 / answer_words.len() as f32
};
Ok(faithfulness.min(1.0))
}
fn get_config(&self) -> RagasMetricConfig {
RagasMetricConfig {
name: "faithfulness".to_string(),
requires_ground_truth: false,
requires_context: true,
score_range: (0.0, 1.0),
higher_is_better: true,
}
}
}
struct AnswerRelevancyMetric {
config: AnswerRelevancyConfig,
}
impl AnswerRelevancyMetric {
fn new(config: AnswerRelevancyConfig) -> Self {
Self { config }
}
}
impl RagasMetric for AnswerRelevancyMetric {
fn name(&self) -> &str {
"answer_relevancy"
}
fn metric_type(&self) -> RagasMetricType {
RagasMetricType::AnswerRelevancy
}
fn evaluate_query(
&self,
query: &str,
_contexts: &[String],
answer: &str,
_ground_truth: Option<&str>,
) -> RragResult<f32> {
if query.is_empty() || answer.is_empty() {
return Ok(0.0);
}
let query_lower = query.to_lowercase();
let query_words: std::collections::HashSet<&str> = query_lower.split_whitespace().collect();
let answer_lower = answer.to_lowercase();
let answer_words: std::collections::HashSet<&str> =
answer_lower.split_whitespace().collect();
let overlap = query_words.intersection(&answer_words).count();
let union = query_words.union(&answer_words).count();
let jaccard = if union == 0 {
0.0
} else {
overlap as f32 / union as f32
};
Ok(jaccard)
}
fn get_config(&self) -> RagasMetricConfig {
RagasMetricConfig {
name: "answer_relevancy".to_string(),
requires_ground_truth: false,
requires_context: false,
score_range: (0.0, 1.0),
higher_is_better: true,
}
}
}
struct ContextPrecisionMetric {
config: ContextPrecisionConfig,
}
impl ContextPrecisionMetric {
fn new(config: ContextPrecisionConfig) -> Self {
Self { config }
}
}
impl RagasMetric for ContextPrecisionMetric {
fn name(&self) -> &str {
"context_precision"
}
fn metric_type(&self) -> RagasMetricType {
RagasMetricType::ContextPrecision
}
fn evaluate_query(
&self,
query: &str,
contexts: &[String],
_answer: &str,
_ground_truth: Option<&str>,
) -> RragResult<f32> {
if contexts.is_empty() {
return Ok(0.0);
}
let query_lower = query.to_lowercase();
let query_words: std::collections::HashSet<&str> = query_lower.split_whitespace().collect();
let mut relevant_contexts = 0;
for context in contexts {
let context_lower = context.to_lowercase();
let context_words: std::collections::HashSet<&str> =
context_lower.split_whitespace().collect();
let overlap = query_words.intersection(&context_words).count();
let relevance = overlap as f32 / query_words.len() as f32;
if relevance >= self.config.relevance_threshold {
relevant_contexts += 1;
}
}
let precision = relevant_contexts as f32 / contexts.len() as f32;
Ok(precision)
}
fn get_config(&self) -> RagasMetricConfig {
RagasMetricConfig {
name: "context_precision".to_string(),
requires_ground_truth: false,
requires_context: true,
score_range: (0.0, 1.0),
higher_is_better: true,
}
}
}
struct ContextRecallMetric {
config: ContextRecallConfig,
}
impl ContextRecallMetric {
fn new(config: ContextRecallConfig) -> Self {
Self { config }
}
}
impl RagasMetric for ContextRecallMetric {
fn name(&self) -> &str {
"context_recall"
}
fn metric_type(&self) -> RagasMetricType {
RagasMetricType::ContextRecall
}
fn evaluate_query(
&self,
_query: &str,
contexts: &[String],
_answer: &str,
ground_truth: Option<&str>,
) -> RragResult<f32> {
let ground_truth = match ground_truth {
Some(gt) => gt,
None => return Ok(0.5), };
if contexts.is_empty() {
return Ok(0.0);
}
let gt_sentences: Vec<&str> = ground_truth.split('.').collect();
let context_text = contexts.join(" ");
let mut recalled_sentences = 0;
for sentence in >_sentences {
if sentence.trim().is_empty() {
continue;
}
let sentence_lower = sentence.to_lowercase();
let sentence_words: std::collections::HashSet<&str> =
sentence_lower.split_whitespace().collect();
let context_text_lower = context_text.to_lowercase();
let context_words: std::collections::HashSet<&str> =
context_text_lower.split_whitespace().collect();
let overlap = sentence_words.intersection(&context_words).count();
let similarity = if sentence_words.is_empty() {
0.0
} else {
overlap as f32 / sentence_words.len() as f32
};
if similarity >= self.config.sentence_similarity_threshold {
recalled_sentences += 1;
}
}
let recall = if gt_sentences.is_empty() {
1.0
} else {
recalled_sentences as f32 / gt_sentences.len() as f32
};
Ok(recall)
}
fn get_config(&self) -> RagasMetricConfig {
RagasMetricConfig {
name: "context_recall".to_string(),
requires_ground_truth: true,
requires_context: true,
score_range: (0.0, 1.0),
higher_is_better: true,
}
}
}
struct ContextRelevancyMetric {
config: ContextRelevancyConfig,
}
impl ContextRelevancyMetric {
fn new(config: ContextRelevancyConfig) -> Self {
Self { config }
}
}
impl RagasMetric for ContextRelevancyMetric {
fn name(&self) -> &str {
"context_relevancy"
}
fn metric_type(&self) -> RagasMetricType {
RagasMetricType::ContextRelevancy
}
fn evaluate_query(
&self,
query: &str,
contexts: &[String],
_answer: &str,
_ground_truth: Option<&str>,
) -> RragResult<f32> {
if contexts.is_empty() || query.is_empty() {
return Ok(0.0);
}
let query_lower = query.to_lowercase();
let query_words: std::collections::HashSet<&str> = query_lower.split_whitespace().collect();
let context_text = contexts.join(" ");
let context_text_lower = context_text.to_lowercase();
let context_words: std::collections::HashSet<&str> =
context_text_lower.split_whitespace().collect();
let overlap = query_words.intersection(&context_words).count();
let union = query_words.union(&context_words).count();
let relevancy = if union == 0 {
0.0
} else {
overlap as f32 / union as f32
};
Ok(relevancy)
}
fn get_config(&self) -> RagasMetricConfig {
RagasMetricConfig {
name: "context_relevancy".to_string(),
requires_ground_truth: false,
requires_context: true,
score_range: (0.0, 1.0),
higher_is_better: true,
}
}
}
struct AnswerSimilarityMetric {
config: AnswerSimilarityConfig,
}
impl AnswerSimilarityMetric {
fn new(config: AnswerSimilarityConfig) -> Self {
Self { config }
}
}
impl RagasMetric for AnswerSimilarityMetric {
fn name(&self) -> &str {
"answer_similarity"
}
fn metric_type(&self) -> RagasMetricType {
RagasMetricType::AnswerSimilarity
}
fn evaluate_query(
&self,
_query: &str,
_contexts: &[String],
answer: &str,
ground_truth: Option<&str>,
) -> RragResult<f32> {
let ground_truth = match ground_truth {
Some(gt) => gt,
None => return Ok(0.0),
};
if answer.is_empty() || ground_truth.is_empty() {
return Ok(0.0);
}
match self.config.similarity_method {
SimilarityMethod::Cosine | SimilarityMethod::Jaccard => {
let answer_lower = answer.to_lowercase();
let answer_words: std::collections::HashSet<&str> =
answer_lower.split_whitespace().collect();
let gt_lower = ground_truth.to_lowercase();
let gt_words: std::collections::HashSet<&str> =
gt_lower.split_whitespace().collect();
let intersection = answer_words.intersection(>_words).count();
let union = answer_words.union(>_words).count();
let similarity = if union == 0 {
0.0
} else {
intersection as f32 / union as f32
};
Ok(similarity)
}
_ => {
let answer_lower = answer.to_lowercase();
let answer_words: std::collections::HashSet<&str> =
answer_lower.split_whitespace().collect();
let gt_lower = ground_truth.to_lowercase();
let gt_words: std::collections::HashSet<&str> =
gt_lower.split_whitespace().collect();
let intersection = answer_words.intersection(>_words).count();
let union = answer_words.union(>_words).count();
let similarity = if union == 0 {
0.0
} else {
intersection as f32 / union as f32
};
Ok(similarity)
}
}
}
fn get_config(&self) -> RagasMetricConfig {
RagasMetricConfig {
name: "answer_similarity".to_string(),
requires_ground_truth: true,
requires_context: false,
score_range: (0.0, 1.0),
higher_is_better: true,
}
}
}
struct AnswerCorrectnessMetric {
config: AnswerCorrectnessConfig,
}
impl AnswerCorrectnessMetric {
fn new(config: AnswerCorrectnessConfig) -> Self {
Self { config }
}
}
impl RagasMetric for AnswerCorrectnessMetric {
fn name(&self) -> &str {
"answer_correctness"
}
fn metric_type(&self) -> RagasMetricType {
RagasMetricType::AnswerCorrectness
}
fn evaluate_query(
&self,
_query: &str,
_contexts: &[String],
answer: &str,
ground_truth: Option<&str>,
) -> RragResult<f32> {
let ground_truth = match ground_truth {
Some(gt) => gt,
None => return Ok(0.0),
};
if answer.is_empty() || ground_truth.is_empty() {
return Ok(0.0);
}
let answer_lower = answer.to_lowercase();
let answer_words: std::collections::HashSet<&str> =
answer_lower.split_whitespace().collect();
let gt_lower = ground_truth.to_lowercase();
let gt_words: std::collections::HashSet<&str> = gt_lower.split_whitespace().collect();
let intersection = answer_words.intersection(>_words).count();
let factual_score = if gt_words.is_empty() {
0.0
} else {
intersection as f32 / gt_words.len() as f32
};
let union = answer_words.union(>_words).count();
let semantic_score = if union == 0 {
0.0
} else {
intersection as f32 / union as f32
};
let correctness = factual_score * self.config.factual_weight
+ semantic_score * self.config.semantic_weight;
Ok(correctness.min(1.0))
}
fn get_config(&self) -> RagasMetricConfig {
RagasMetricConfig {
name: "answer_correctness".to_string(),
requires_ground_truth: true,
requires_context: false,
score_range: (0.0, 1.0),
higher_is_better: true,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_faithfulness_metric() {
let config = FaithfulnessConfig::default();
let metric = FaithfulnessMetric::new(config);
let contexts = vec!["Machine learning is a subset of AI".to_string()];
let answer = "Machine learning is part of artificial intelligence";
let score = metric.evaluate_query("", &contexts, answer, None).unwrap();
assert!(score > 0.0 && score <= 1.0);
}
#[test]
fn test_answer_relevancy_metric() {
let config = AnswerRelevancyConfig::default();
let metric = AnswerRelevancyMetric::new(config);
let query = "What is machine learning?";
let answer = "Machine learning is a subset of artificial intelligence";
let score = metric.evaluate_query(query, &[], answer, None).unwrap();
assert!(score > 0.0);
}
#[test]
fn test_context_precision_metric() {
let config = ContextPrecisionConfig::default();
let metric = ContextPrecisionMetric::new(config);
let query = "machine learning";
let contexts = vec![
"Machine learning is great".to_string(),
"The weather is nice today".to_string(),
];
let score = metric.evaluate_query(query, &contexts, "", None).unwrap();
assert!(score > 0.0 && score <= 1.0);
}
#[test]
fn test_ragas_evaluator_creation() {
let config = RagasConfig::default();
let evaluator = RagasEvaluator::new(config);
assert_eq!(evaluator.name(), "RAGAS");
assert!(!evaluator.supported_metrics().is_empty());
}
}