Skip to main content

synaptic_eval/
lib.rs

1mod dataset;
2mod embedding_distance;
3mod evaluator;
4mod exact_match;
5mod json_validity;
6mod llm_judge;
7mod regex_match;
8
9pub use dataset::{evaluate, Dataset, DatasetItem};
10pub use embedding_distance::EmbeddingDistanceEvaluator;
11pub use evaluator::{EvalResult, Evaluator};
12pub use exact_match::ExactMatchEvaluator;
13pub use json_validity::JsonValidityEvaluator;
14pub use llm_judge::LLMJudgeEvaluator;
15pub use regex_match::RegexMatchEvaluator;
16
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct EvalCase {
19    pub expected: String,
20    pub actual: String,
21}
22
23impl EvalCase {
24    pub fn new(expected: impl Into<String>, actual: impl Into<String>) -> Self {
25        Self {
26            expected: expected.into(),
27            actual: actual.into(),
28        }
29    }
30}
31
32#[derive(Debug, Clone)]
33pub struct EvalReport {
34    pub total: usize,
35    pub passed: usize,
36    pub accuracy: f32,
37    pub results: Vec<EvalResult>,
38}
39
40impl EvalReport {
41    /// Create a report from legacy `EvalCase` values (results will be empty).
42    pub fn from_cases(cases: Vec<EvalCase>) -> Self {
43        let total = cases.len();
44        let passed = cases
45            .iter()
46            .filter(|case| case.expected == case.actual)
47            .count();
48        let accuracy = if total == 0 {
49            0.0
50        } else {
51            passed as f32 / total as f32
52        };
53        Self {
54            total,
55            passed,
56            accuracy,
57            results: Vec::new(),
58        }
59    }
60
61    /// Create a report from evaluator results.
62    pub fn from_results(results: Vec<EvalResult>) -> Self {
63        let total = results.len();
64        let passed = results.iter().filter(|r| r.passed).count();
65        let accuracy = if total == 0 {
66            0.0
67        } else {
68            passed as f32 / total as f32
69        };
70        Self {
71            total,
72            passed,
73            accuracy,
74            results,
75        }
76    }
77}