entrenar/monitor/llm/
eval_result.rs1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct EvalResult {
9 pub relevance: f64,
11 pub coherence: f64,
13 pub groundedness: f64,
15 pub harmfulness: f64,
17 pub details: HashMap<String, f64>,
19 pub overall: f64,
21}
22
23impl EvalResult {
24 pub fn new(relevance: f64, coherence: f64, groundedness: f64, harmfulness: f64) -> Self {
26 let overall = Self::compute_overall(relevance, coherence, groundedness, harmfulness);
27 Self {
28 relevance: relevance.clamp(0.0, 1.0),
29 coherence: coherence.clamp(0.0, 1.0),
30 groundedness: groundedness.clamp(0.0, 1.0),
31 harmfulness: harmfulness.clamp(0.0, 1.0),
32 details: HashMap::new(),
33 overall,
34 }
35 }
36
37 pub fn with_detail(mut self, name: &str, score: f64) -> Self {
39 self.details.insert(name.to_string(), score.clamp(0.0, 1.0));
40 self
41 }
42
43 fn compute_overall(relevance: f64, coherence: f64, groundedness: f64, harmfulness: f64) -> f64 {
45 let weights = [0.3, 0.2, 0.3, 0.2]; let scores = [
48 relevance,
49 coherence,
50 groundedness,
51 1.0 - harmfulness, ];
53
54 let weighted_sum: f64 = weights.iter().zip(scores.iter()).map(|(w, s)| w * s).sum();
55 weighted_sum.clamp(0.0, 1.0)
56 }
57
58 pub fn passes_threshold(&self, min_overall: f64, max_harmfulness: f64) -> bool {
60 self.overall >= min_overall && self.harmfulness <= max_harmfulness
61 }
62}
63
64impl Default for EvalResult {
65 fn default() -> Self {
66 Self::new(0.0, 0.0, 0.0, 0.0)
67 }
68}