use serde_json::Value;
use std::collections::HashMap;
use std::time::Instant;
pub trait EvalMetric {
fn name(&self) -> &str;
fn description(&self) -> &str;
fn evaluate(&self, expected: &Value, actual: &Value) -> EvalScore;
}
#[derive(Debug, Clone)]
pub struct EvalScore {
pub metric_name: String,
pub score: f64,
pub details: HashMap<String, Value>,
}
impl EvalScore {
pub fn new(metric_name: impl Into<String>, score: f64) -> Self {
Self {
metric_name: metric_name.into(),
score: score.clamp(0.0, 1.0),
details: HashMap::new(),
}
}
pub fn with_detail(mut self, key: impl Into<String>, value: Value) -> Self {
self.details.insert(key.into(), value);
self
}
pub fn is_passing(&self, threshold: f64) -> bool {
self.score >= threshold
}
pub fn to_json(&self) -> Value {
serde_json::json!({
"metric_name": self.metric_name,
"score": self.score,
"details": self.details,
})
}
}
#[derive(Debug, Clone, Default)]
pub struct ExactMatchMetric;
impl EvalMetric for ExactMatchMetric {
fn name(&self) -> &str {
"exact_match"
}
fn description(&self) -> &str {
"Checks exact equality between expected and actual values"
}
fn evaluate(&self, expected: &Value, actual: &Value) -> EvalScore {
let matched = expected == actual;
EvalScore::new("exact_match", if matched { 1.0 } else { 0.0 })
.with_detail("matched", serde_json::json!(matched))
}
}
#[derive(Debug, Clone)]
pub struct ContainsMetric {
pub case_insensitive: bool,
}
impl ContainsMetric {
pub fn new(case_insensitive: bool) -> Self {
Self { case_insensitive }
}
fn value_to_string(v: &Value) -> String {
match v {
Value::String(s) => s.clone(),
other => other.to_string(),
}
}
}
impl Default for ContainsMetric {
fn default() -> Self {
Self::new(false)
}
}
impl EvalMetric for ContainsMetric {
fn name(&self) -> &str {
"contains"
}
fn description(&self) -> &str {
"Checks if the actual output contains the expected value as a substring"
}
fn evaluate(&self, expected: &Value, actual: &Value) -> EvalScore {
let expected_str = Self::value_to_string(expected);
let actual_str = Self::value_to_string(actual);
let contained = if self.case_insensitive {
actual_str
.to_lowercase()
.contains(&expected_str.to_lowercase())
} else {
actual_str.contains(&expected_str)
};
EvalScore::new("contains", if contained { 1.0 } else { 0.0 })
.with_detail("contained", serde_json::json!(contained))
.with_detail("case_insensitive", serde_json::json!(self.case_insensitive))
}
}
#[derive(Debug, Clone)]
pub struct NumericDistanceMetric {
pub tolerance: f64,
}
impl NumericDistanceMetric {
pub fn new(tolerance: f64) -> Self {
Self {
tolerance: tolerance.abs().max(f64::EPSILON),
}
}
}
impl EvalMetric for NumericDistanceMetric {
fn name(&self) -> &str {
"numeric_distance"
}
fn description(&self) -> &str {
"Computes normalized distance between numeric values within a tolerance"
}
fn evaluate(&self, expected: &Value, actual: &Value) -> EvalScore {
let expected_num = value_as_f64(expected);
let actual_num = value_as_f64(actual);
match (expected_num, actual_num) {
(Some(e), Some(a)) => {
let distance = (e - a).abs();
let score = (1.0 - distance / self.tolerance).clamp(0.0, 1.0);
EvalScore::new("numeric_distance", score)
.with_detail("expected", serde_json::json!(e))
.with_detail("actual", serde_json::json!(a))
.with_detail("distance", serde_json::json!(distance))
.with_detail("tolerance", serde_json::json!(self.tolerance))
}
_ => EvalScore::new("numeric_distance", 0.0)
.with_detail("error", serde_json::json!("non-numeric value")),
}
}
}
fn value_as_f64(v: &Value) -> Option<f64> {
match v {
Value::Number(n) => n.as_f64(),
Value::String(s) => s.parse::<f64>().ok(),
_ => None,
}
}
#[derive(Debug, Clone, Default)]
pub struct JsonSimilarityMetric;
impl JsonSimilarityMetric {
fn similarity(expected: &Value, actual: &Value) -> (f64, f64) {
match (expected, actual) {
(Value::Object(exp_map), Value::Object(act_map)) => {
if exp_map.is_empty() {
return if act_map.is_empty() {
(1.0, 1.0)
} else {
(0.0, 1.0)
};
}
let mut matched = 0.0;
let total = exp_map.len() as f64;
for (key, exp_val) in exp_map {
if let Some(act_val) = act_map.get(key) {
let (m, t) = Self::similarity(exp_val, act_val);
matched += m / t;
}
}
(matched, total)
}
(Value::Array(exp_arr), Value::Array(act_arr)) => {
if exp_arr.is_empty() {
return if act_arr.is_empty() {
(1.0, 1.0)
} else {
(0.0, 1.0)
};
}
let total = exp_arr.len() as f64;
let mut matched = 0.0;
for (i, exp_val) in exp_arr.iter().enumerate() {
if let Some(act_val) = act_arr.get(i) {
let (m, t) = Self::similarity(exp_val, act_val);
matched += m / t;
}
}
(matched, total)
}
_ => {
if expected == actual {
(1.0, 1.0)
} else {
(0.0, 1.0)
}
}
}
}
}
impl EvalMetric for JsonSimilarityMetric {
fn name(&self) -> &str {
"json_similarity"
}
fn description(&self) -> &str {
"Compares JSON structures and scores based on matching keys and values"
}
fn evaluate(&self, expected: &Value, actual: &Value) -> EvalScore {
let (matched, total) = Self::similarity(expected, actual);
let score = if total == 0.0 { 1.0 } else { matched / total };
EvalScore::new("json_similarity", score)
.with_detail("matched", serde_json::json!(matched))
.with_detail("total", serde_json::json!(total))
}
}
#[derive(Debug, Clone)]
pub struct EvalCase {
pub name: String,
pub input: Value,
pub expected_output: Value,
pub tags: Vec<String>,
pub metadata: HashMap<String, Value>,
}
impl EvalCase {
pub fn builder(name: impl Into<String>) -> EvalCaseBuilder {
EvalCaseBuilder {
name: name.into(),
input: Value::Null,
expected_output: Value::Null,
tags: Vec::new(),
metadata: HashMap::new(),
}
}
pub fn to_json(&self) -> Value {
serde_json::json!({
"name": self.name,
"input": self.input,
"expected_output": self.expected_output,
"tags": self.tags,
"metadata": self.metadata,
})
}
}
#[derive(Debug, Clone)]
pub struct EvalCaseBuilder {
name: String,
input: Value,
expected_output: Value,
tags: Vec<String>,
metadata: HashMap<String, Value>,
}
impl EvalCaseBuilder {
pub fn input(mut self, input: Value) -> Self {
self.input = input;
self
}
pub fn expected_output(mut self, expected: Value) -> Self {
self.expected_output = expected;
self
}
pub fn tag(mut self, tag: impl Into<String>) -> Self {
self.tags.push(tag.into());
self
}
pub fn metadata(mut self, key: impl Into<String>, value: Value) -> Self {
self.metadata.insert(key.into(), value);
self
}
pub fn build(self) -> EvalCase {
EvalCase {
name: self.name,
input: self.input,
expected_output: self.expected_output,
tags: self.tags,
metadata: self.metadata,
}
}
}
#[derive(Debug, Clone)]
pub struct EvalCaseResult {
pub case_name: String,
pub scores: Vec<EvalScore>,
pub input: Value,
pub expected: Value,
pub actual: Value,
pub passed: bool,
}
impl EvalCaseResult {
pub fn avg_score(&self) -> f64 {
if self.scores.is_empty() {
return 0.0;
}
let sum: f64 = self.scores.iter().map(|s| s.score).sum();
sum / self.scores.len() as f64
}
pub fn to_json(&self) -> Value {
serde_json::json!({
"case_name": self.case_name,
"scores": self.scores.iter().map(|s| s.to_json()).collect::<Vec<_>>(),
"input": self.input,
"expected": self.expected,
"actual": self.actual,
"passed": self.passed,
"avg_score": self.avg_score(),
})
}
}
#[derive(Debug, Clone)]
pub struct EvalSuite {
pub name: String,
cases: Vec<EvalCase>,
}
impl EvalSuite {
pub fn new(name: impl Into<String>) -> Self {
Self {
name: name.into(),
cases: Vec::new(),
}
}
pub fn add_case(&mut self, case: EvalCase) {
self.cases.push(case);
}
pub fn case_count(&self) -> usize {
self.cases.len()
}
pub fn by_tag(&self, tag: &str) -> Vec<&EvalCase> {
self.cases
.iter()
.filter(|c| c.tags.iter().any(|t| t == tag))
.collect()
}
pub fn run(
&self,
evaluator: &dyn Fn(&Value) -> Value,
metrics: &[Box<dyn EvalMetric>],
) -> EvalReport {
let start = Instant::now();
let threshold = 0.5;
let mut results = Vec::with_capacity(self.cases.len());
for case in &self.cases {
let actual = evaluator(&case.input);
let scores: Vec<EvalScore> = metrics
.iter()
.map(|m| m.evaluate(&case.expected_output, &actual))
.collect();
let avg = if scores.is_empty() {
0.0
} else {
scores.iter().map(|s| s.score).sum::<f64>() / scores.len() as f64
};
results.push(EvalCaseResult {
case_name: case.name.clone(),
scores,
input: case.input.clone(),
expected: case.expected_output.clone(),
actual,
passed: avg >= threshold,
});
}
let overall_score = if results.is_empty() {
0.0
} else {
results.iter().map(|r| r.avg_score()).sum::<f64>() / results.len() as f64
};
let duration_ms = start.elapsed().as_millis() as u64;
EvalReport {
suite_name: self.name.clone(),
results,
overall_score,
duration_ms,
}
}
}
#[derive(Debug, Clone)]
pub struct EvalReport {
pub suite_name: String,
pub results: Vec<EvalCaseResult>,
pub overall_score: f64,
pub duration_ms: u64,
}
impl EvalReport {
pub fn passing_rate(&self, threshold: f64) -> f64 {
if self.results.is_empty() {
return 0.0;
}
let passing = self
.results
.iter()
.filter(|r| r.avg_score() >= threshold)
.count();
passing as f64 / self.results.len() as f64
}
pub fn failures(&self, threshold: f64) -> Vec<&EvalCaseResult> {
self.results
.iter()
.filter(|r| r.avg_score() < threshold)
.collect()
}
pub fn to_json(&self) -> Value {
serde_json::json!({
"suite_name": self.suite_name,
"overall_score": self.overall_score,
"duration_ms": self.duration_ms,
"case_count": self.results.len(),
"results": self.results.iter().map(|r| r.to_json()).collect::<Vec<_>>(),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_eval_score_creation() {
let score = EvalScore::new("test_metric", 0.75);
assert_eq!(score.metric_name, "test_metric");
assert!((score.score - 0.75).abs() < f64::EPSILON);
assert!(score.details.is_empty());
}
#[test]
fn test_eval_score_clamped_above() {
let score = EvalScore::new("m", 1.5);
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_eval_score_clamped_below() {
let score = EvalScore::new("m", -0.5);
assert!((score.score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_eval_score_with_detail() {
let score = EvalScore::new("m", 0.5).with_detail("key", json!("value"));
assert_eq!(score.details["key"], json!("value"));
}
#[test]
fn test_eval_score_is_passing_true() {
let score = EvalScore::new("m", 0.8);
assert!(score.is_passing(0.7));
assert!(score.is_passing(0.8));
}
#[test]
fn test_eval_score_is_passing_false() {
let score = EvalScore::new("m", 0.3);
assert!(!score.is_passing(0.5));
}
#[test]
fn test_eval_score_to_json() {
let score = EvalScore::new("m", 0.9).with_detail("info", json!("ok"));
let j = score.to_json();
assert_eq!(j["metric_name"], "m");
assert_eq!(j["score"], 0.9);
assert_eq!(j["details"]["info"], "ok");
}
#[test]
fn test_exact_match_equal_strings() {
let m = ExactMatchMetric;
let score = m.evaluate(&json!("hello"), &json!("hello"));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_exact_match_not_equal_strings() {
let m = ExactMatchMetric;
let score = m.evaluate(&json!("hello"), &json!("world"));
assert!((score.score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_exact_match_equal_numbers() {
let m = ExactMatchMetric;
let score = m.evaluate(&json!(42), &json!(42));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_exact_match_different_types() {
let m = ExactMatchMetric;
let score = m.evaluate(&json!("42"), &json!(42));
assert!((score.score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_exact_match_equal_objects() {
let m = ExactMatchMetric;
let score = m.evaluate(&json!({"a": 1}), &json!({"a": 1}));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_exact_match_name() {
let m = ExactMatchMetric;
assert_eq!(m.name(), "exact_match");
assert!(!m.description().is_empty());
}
#[test]
fn test_contains_substring_found() {
let m = ContainsMetric::new(false);
let score = m.evaluate(&json!("world"), &json!("hello world"));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_contains_substring_not_found() {
let m = ContainsMetric::new(false);
let score = m.evaluate(&json!("xyz"), &json!("hello world"));
assert!((score.score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_contains_case_sensitive_fail() {
let m = ContainsMetric::new(false);
let score = m.evaluate(&json!("HELLO"), &json!("hello world"));
assert!((score.score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_contains_case_insensitive_pass() {
let m = ContainsMetric::new(true);
let score = m.evaluate(&json!("HELLO"), &json!("hello world"));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_contains_non_string_values() {
let m = ContainsMetric::new(false);
let score = m.evaluate(&json!("42"), &json!("[1,42,3]"));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_contains_name() {
let m = ContainsMetric::new(false);
assert_eq!(m.name(), "contains");
}
#[test]
fn test_numeric_distance_exact() {
let m = NumericDistanceMetric::new(10.0);
let score = m.evaluate(&json!(5.0), &json!(5.0));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_numeric_distance_within_tolerance() {
let m = NumericDistanceMetric::new(10.0);
let score = m.evaluate(&json!(10.0), &json!(15.0));
assert!((score.score - 0.5).abs() < f64::EPSILON);
}
#[test]
fn test_numeric_distance_at_tolerance() {
let m = NumericDistanceMetric::new(10.0);
let score = m.evaluate(&json!(0.0), &json!(10.0));
assert!((score.score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_numeric_distance_beyond_tolerance() {
let m = NumericDistanceMetric::new(10.0);
let score = m.evaluate(&json!(0.0), &json!(20.0));
assert!((score.score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_numeric_distance_non_numeric() {
let m = NumericDistanceMetric::new(10.0);
let score = m.evaluate(&json!("abc"), &json!(5.0));
assert!((score.score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_numeric_distance_string_numbers() {
let m = NumericDistanceMetric::new(10.0);
let score = m.evaluate(&json!("5.0"), &json!("5.0"));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_numeric_distance_name() {
let m = NumericDistanceMetric::new(1.0);
assert_eq!(m.name(), "numeric_distance");
}
#[test]
fn test_numeric_distance_zero_tolerance() {
let m = NumericDistanceMetric::new(0.0);
assert!(m.tolerance > 0.0);
}
#[test]
fn test_json_similarity_identical_objects() {
let m = JsonSimilarityMetric;
let v = json!({"a": 1, "b": "hello"});
let score = m.evaluate(&v, &v);
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_json_similarity_partial_match() {
let m = JsonSimilarityMetric;
let expected = json!({"a": 1, "b": 2});
let actual = json!({"a": 1, "b": 99});
let score = m.evaluate(&expected, &actual);
assert!((score.score - 0.5).abs() < f64::EPSILON);
}
#[test]
fn test_json_similarity_missing_keys() {
let m = JsonSimilarityMetric;
let expected = json!({"a": 1, "b": 2});
let actual = json!({"a": 1});
let score = m.evaluate(&expected, &actual);
assert!((score.score - 0.5).abs() < f64::EPSILON);
}
#[test]
fn test_json_similarity_arrays() {
let m = JsonSimilarityMetric;
let expected = json!([1, 2, 3]);
let actual = json!([1, 2, 99]);
let score = m.evaluate(&expected, &actual);
assert!((score.score - 2.0 / 3.0).abs() < 1e-10);
}
#[test]
fn test_json_similarity_scalar_match() {
let m = JsonSimilarityMetric;
let score = m.evaluate(&json!(42), &json!(42));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_json_similarity_scalar_mismatch() {
let m = JsonSimilarityMetric;
let score = m.evaluate(&json!(42), &json!(99));
assert!((score.score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_json_similarity_nested_objects() {
let m = JsonSimilarityMetric;
let expected = json!({"a": {"x": 1, "y": 2}});
let actual = json!({"a": {"x": 1, "y": 99}});
let score = m.evaluate(&expected, &actual);
assert!((score.score - 0.5).abs() < f64::EPSILON);
}
#[test]
fn test_json_similarity_empty_objects() {
let m = JsonSimilarityMetric;
let score = m.evaluate(&json!({}), &json!({}));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_json_similarity_name() {
let m = JsonSimilarityMetric;
assert_eq!(m.name(), "json_similarity");
}
#[test]
fn test_eval_case_builder_basic() {
let case = EvalCase::builder("test_case")
.input(json!("input"))
.expected_output(json!("output"))
.build();
assert_eq!(case.name, "test_case");
assert_eq!(case.input, json!("input"));
assert_eq!(case.expected_output, json!("output"));
assert!(case.tags.is_empty());
assert!(case.metadata.is_empty());
}
#[test]
fn test_eval_case_builder_with_tags() {
let case = EvalCase::builder("c")
.input(json!(null))
.expected_output(json!(null))
.tag("math")
.tag("easy")
.build();
assert_eq!(case.tags, vec!["math", "easy"]);
}
#[test]
fn test_eval_case_builder_with_metadata() {
let case = EvalCase::builder("c")
.input(json!(null))
.expected_output(json!(null))
.metadata("difficulty", json!("hard"))
.build();
assert_eq!(case.metadata["difficulty"], json!("hard"));
}
#[test]
fn test_eval_case_to_json() {
let case = EvalCase::builder("c")
.input(json!("in"))
.expected_output(json!("out"))
.tag("t")
.build();
let j = case.to_json();
assert_eq!(j["name"], "c");
assert_eq!(j["input"], "in");
assert_eq!(j["expected_output"], "out");
assert_eq!(j["tags"][0], "t");
}
#[test]
fn test_eval_suite_new() {
let suite = EvalSuite::new("test_suite");
assert_eq!(suite.name, "test_suite");
assert_eq!(suite.case_count(), 0);
}
#[test]
fn test_eval_suite_add_case() {
let mut suite = EvalSuite::new("s");
suite.add_case(
EvalCase::builder("c1")
.input(json!(1))
.expected_output(json!(1))
.build(),
);
suite.add_case(
EvalCase::builder("c2")
.input(json!(2))
.expected_output(json!(2))
.build(),
);
assert_eq!(suite.case_count(), 2);
}
#[test]
fn test_eval_suite_by_tag() {
let mut suite = EvalSuite::new("s");
suite.add_case(
EvalCase::builder("c1")
.tag("math")
.input(json!(1))
.expected_output(json!(1))
.build(),
);
suite.add_case(
EvalCase::builder("c2")
.tag("text")
.input(json!(2))
.expected_output(json!(2))
.build(),
);
suite.add_case(
EvalCase::builder("c3")
.tag("math")
.input(json!(3))
.expected_output(json!(3))
.build(),
);
let math_cases = suite.by_tag("math");
assert_eq!(math_cases.len(), 2);
assert_eq!(math_cases[0].name, "c1");
assert_eq!(math_cases[1].name, "c3");
}
#[test]
fn test_eval_suite_by_tag_no_match() {
let suite = EvalSuite::new("s");
assert!(suite.by_tag("nonexistent").is_empty());
}
#[test]
fn test_eval_suite_run_all_pass() {
let mut suite = EvalSuite::new("s");
suite.add_case(
EvalCase::builder("c1")
.input(json!("a"))
.expected_output(json!("a"))
.build(),
);
suite.add_case(
EvalCase::builder("c2")
.input(json!(42))
.expected_output(json!(42))
.build(),
);
let metrics: Vec<Box<dyn EvalMetric>> = vec![Box::new(ExactMatchMetric)];
let report = suite.run(&|input| input.clone(), &metrics);
assert_eq!(report.results.len(), 2);
assert!((report.overall_score - 1.0).abs() < f64::EPSILON);
assert!(report.results.iter().all(|r| r.passed));
}
#[test]
fn test_eval_suite_run_all_fail() {
let mut suite = EvalSuite::new("s");
suite.add_case(
EvalCase::builder("c1")
.input(json!("a"))
.expected_output(json!("b"))
.build(),
);
let metrics: Vec<Box<dyn EvalMetric>> = vec![Box::new(ExactMatchMetric)];
let report = suite.run(&|input| input.clone(), &metrics);
assert!((report.overall_score - 0.0).abs() < f64::EPSILON);
assert!(!report.results[0].passed);
}
#[test]
fn test_eval_suite_run_mixed() {
let mut suite = EvalSuite::new("s");
suite.add_case(
EvalCase::builder("pass")
.input(json!("x"))
.expected_output(json!("x"))
.build(),
);
suite.add_case(
EvalCase::builder("fail")
.input(json!("x"))
.expected_output(json!("y"))
.build(),
);
let metrics: Vec<Box<dyn EvalMetric>> = vec![Box::new(ExactMatchMetric)];
let report = suite.run(&|input| input.clone(), &metrics);
assert!((report.overall_score - 0.5).abs() < f64::EPSILON);
assert!(report.results[0].passed);
assert!(!report.results[1].passed);
}
#[test]
fn test_eval_suite_run_multiple_metrics() {
let mut suite = EvalSuite::new("s");
suite.add_case(
EvalCase::builder("c1")
.input(json!("hello world"))
.expected_output(json!("hello world"))
.build(),
);
let metrics: Vec<Box<dyn EvalMetric>> = vec![
Box::new(ExactMatchMetric),
Box::new(ContainsMetric::new(false)),
];
let report = suite.run(&|input| input.clone(), &metrics);
assert_eq!(report.results[0].scores.len(), 2);
assert!((report.results[0].avg_score() - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_eval_suite_run_empty() {
let suite = EvalSuite::new("empty");
let metrics: Vec<Box<dyn EvalMetric>> = vec![Box::new(ExactMatchMetric)];
let report = suite.run(&|input| input.clone(), &metrics);
assert_eq!(report.results.len(), 0);
assert!((report.overall_score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_eval_report_passing_rate() {
let mut suite = EvalSuite::new("s");
for i in 0..10 {
let expected = if i < 7 { json!(i) } else { json!("wrong") };
suite.add_case(
EvalCase::builder(format!("c{}", i))
.input(json!(i))
.expected_output(expected)
.build(),
);
}
let metrics: Vec<Box<dyn EvalMetric>> = vec![Box::new(ExactMatchMetric)];
let report = suite.run(&|input| input.clone(), &metrics);
assert!((report.passing_rate(0.5) - 0.7).abs() < f64::EPSILON);
}
#[test]
fn test_eval_report_passing_rate_empty() {
let suite = EvalSuite::new("s");
let metrics: Vec<Box<dyn EvalMetric>> = vec![Box::new(ExactMatchMetric)];
let report = suite.run(&|_| json!(null), &metrics);
assert!((report.passing_rate(0.5) - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_eval_report_failures() {
let mut suite = EvalSuite::new("s");
suite.add_case(
EvalCase::builder("pass")
.input(json!(1))
.expected_output(json!(1))
.build(),
);
suite.add_case(
EvalCase::builder("fail")
.input(json!(1))
.expected_output(json!(2))
.build(),
);
let metrics: Vec<Box<dyn EvalMetric>> = vec![Box::new(ExactMatchMetric)];
let report = suite.run(&|input| input.clone(), &metrics);
let failures = report.failures(0.5);
assert_eq!(failures.len(), 1);
assert_eq!(failures[0].case_name, "fail");
}
#[test]
fn test_eval_report_to_json() {
let mut suite = EvalSuite::new("my_suite");
suite.add_case(
EvalCase::builder("c")
.input(json!(1))
.expected_output(json!(1))
.build(),
);
let metrics: Vec<Box<dyn EvalMetric>> = vec![Box::new(ExactMatchMetric)];
let report = suite.run(&|input| input.clone(), &metrics);
let j = report.to_json();
assert_eq!(j["suite_name"], "my_suite");
assert_eq!(j["case_count"], 1);
assert!(j["overall_score"].is_number());
assert!(j["duration_ms"].is_number());
assert!(j["results"].is_array());
}
#[test]
fn test_eval_report_duration() {
let mut suite = EvalSuite::new("s");
suite.add_case(
EvalCase::builder("c")
.input(json!(1))
.expected_output(json!(1))
.build(),
);
let metrics: Vec<Box<dyn EvalMetric>> = vec![Box::new(ExactMatchMetric)];
let report = suite.run(&|input| input.clone(), &metrics);
assert!(report.duration_ms < 10_000);
}
#[test]
fn test_eval_case_result_avg_score() {
let result = EvalCaseResult {
case_name: "c".into(),
scores: vec![
EvalScore::new("m1", 1.0),
EvalScore::new("m2", 0.5),
EvalScore::new("m3", 0.0),
],
input: json!(null),
expected: json!(null),
actual: json!(null),
passed: false,
};
assert!((result.avg_score() - 0.5).abs() < f64::EPSILON);
}
#[test]
fn test_eval_case_result_avg_score_empty() {
let result = EvalCaseResult {
case_name: "c".into(),
scores: vec![],
input: json!(null),
expected: json!(null),
actual: json!(null),
passed: false,
};
assert!((result.avg_score() - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_eval_case_result_to_json() {
let result = EvalCaseResult {
case_name: "c".into(),
scores: vec![EvalScore::new("m", 0.8)],
input: json!("in"),
expected: json!("exp"),
actual: json!("act"),
passed: true,
};
let j = result.to_json();
assert_eq!(j["case_name"], "c");
assert_eq!(j["passed"], true);
assert!(j["avg_score"].is_number());
}
#[test]
fn test_eval_score_nan_clamped() {
let score = EvalScore::new("m", f64::NAN);
assert!(!score.is_passing(0.5));
}
#[test]
fn test_contains_empty_expected() {
let m = ContainsMetric::new(false);
let score = m.evaluate(&json!(""), &json!("anything"));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_json_similarity_empty_arrays() {
let m = JsonSimilarityMetric;
let score = m.evaluate(&json!([]), &json!([]));
assert!((score.score - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_json_similarity_actual_shorter_array() {
let m = JsonSimilarityMetric;
let score = m.evaluate(&json!([1, 2, 3]), &json!([1]));
assert!((score.score - 1.0 / 3.0).abs() < 1e-10);
}
#[test]
fn test_numeric_distance_negative_numbers() {
let m = NumericDistanceMetric::new(20.0);
let score = m.evaluate(&json!(-10), &json!(10));
assert!((score.score - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_eval_suite_run_no_metrics() {
let mut suite = EvalSuite::new("s");
suite.add_case(
EvalCase::builder("c")
.input(json!(1))
.expected_output(json!(1))
.build(),
);
let metrics: Vec<Box<dyn EvalMetric>> = vec![];
let report = suite.run(&|input| input.clone(), &metrics);
assert_eq!(report.results.len(), 1);
assert!(!report.results[0].passed);
}
}