1mod dataset;
2mod embedding_distance;
3mod evaluator;
4mod exact_match;
5mod json_validity;
6mod llm_judge;
7mod regex_match;
8
9pub use dataset::{evaluate, Dataset, DatasetItem};
10pub use embedding_distance::EmbeddingDistanceEvaluator;
11pub use evaluator::{EvalResult, Evaluator};
12pub use exact_match::ExactMatchEvaluator;
13pub use json_validity::JsonValidityEvaluator;
14pub use llm_judge::LLMJudgeEvaluator;
15pub use regex_match::RegexMatchEvaluator;
16
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct EvalCase {
19 pub expected: String,
20 pub actual: String,
21}
22
23impl EvalCase {
24 pub fn new(expected: impl Into<String>, actual: impl Into<String>) -> Self {
25 Self {
26 expected: expected.into(),
27 actual: actual.into(),
28 }
29 }
30}
31
32#[derive(Debug, Clone)]
33pub struct EvalReport {
34 pub total: usize,
35 pub passed: usize,
36 pub accuracy: f32,
37 pub results: Vec<EvalResult>,
38}
39
40impl EvalReport {
41 pub fn from_cases(cases: Vec<EvalCase>) -> Self {
43 let total = cases.len();
44 let passed = cases
45 .iter()
46 .filter(|case| case.expected == case.actual)
47 .count();
48 let accuracy = if total == 0 {
49 0.0
50 } else {
51 passed as f32 / total as f32
52 };
53 Self {
54 total,
55 passed,
56 accuracy,
57 results: Vec::new(),
58 }
59 }
60
61 pub fn from_results(results: Vec<EvalResult>) -> Self {
63 let total = results.len();
64 let passed = results.iter().filter(|r| r.passed).count();
65 let accuracy = if total == 0 {
66 0.0
67 } else {
68 passed as f32 / total as f32
69 };
70 Self {
71 total,
72 passed,
73 accuracy,
74 results,
75 }
76 }
77}