Skip to main content

use_ai_eval/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{fmt, str::FromStr};
5use std::error::Error;
6
7pub mod prelude {
8    pub use crate::{
9        AiEvalDatasetKind, AiEvalError, AiEvalFailureMode, AiEvalJudgeKind, AiEvalKind,
10        AiEvalMetricKind, AiEvalOutcome, AiEvalRubricName, AiEvalRunId, AiEvalScore,
11        AiEvalTargetKind,
12    };
13}
14
15macro_rules! eval_text_newtype {
16    ($name:ident) => {
17        #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
18        pub struct $name(String);
19
20        impl $name {
21            pub fn new(value: impl AsRef<str>) -> Result<Self, AiEvalError> {
22                non_empty_text(value).map(Self)
23            }
24
25            pub fn as_str(&self) -> &str {
26                &self.0
27            }
28
29            pub fn value(&self) -> &str {
30                self.as_str()
31            }
32
33            pub fn into_string(self) -> String {
34                self.0
35            }
36        }
37
38        impl AsRef<str> for $name {
39            fn as_ref(&self) -> &str {
40                self.as_str()
41            }
42        }
43
44        impl fmt::Display for $name {
45            fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
46                formatter.write_str(self.as_str())
47            }
48        }
49
50        impl FromStr for $name {
51            type Err = AiEvalError;
52
53            fn from_str(value: &str) -> Result<Self, Self::Err> {
54                Self::new(value)
55            }
56        }
57
58        impl TryFrom<&str> for $name {
59            type Error = AiEvalError;
60
61            fn try_from(value: &str) -> Result<Self, Self::Error> {
62                Self::new(value)
63            }
64        }
65    };
66}
67
68macro_rules! eval_enum {
69    ($name:ident { $($variant:ident => $label:literal),+ $(,)? }) => {
70        #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
71        pub enum $name {
72            $($variant),+
73        }
74
75        impl $name {
76            pub const ALL: &'static [Self] = &[$(Self::$variant),+];
77
78            pub const fn as_str(self) -> &'static str {
79                match self {
80                    $(Self::$variant => $label),+
81                }
82            }
83        }
84
85        impl fmt::Display for $name {
86            fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
87                formatter.write_str(self.as_str())
88            }
89        }
90
91        impl FromStr for $name {
92            type Err = AiEvalError;
93
94            fn from_str(value: &str) -> Result<Self, Self::Err> {
95                match normalized_label(value)?.as_str() {
96                    $($label => Ok(Self::$variant),)+
97                    _ => Err(AiEvalError::UnknownLabel),
98                }
99            }
100        }
101    };
102}
103
104eval_text_newtype!(AiEvalRunId);
105eval_text_newtype!(AiEvalRubricName);
106
107#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
108pub struct AiEvalScore(f64);
109
110impl AiEvalScore {
111    pub fn new(value: f64) -> Result<Self, AiEvalError> {
112        if !value.is_finite() {
113            return Err(AiEvalError::NonFinite);
114        }
115        if !(0.0..=1.0).contains(&value) {
116            return Err(AiEvalError::OutOfRange);
117        }
118        Ok(Self(value))
119    }
120
121    pub const fn value(self) -> f64 {
122        self.0
123    }
124}
125
126eval_enum!(AiEvalKind {
127    PromptEval => "prompt-eval",
128    ResponseEval => "response-eval",
129    ConversationEval => "conversation-eval",
130    ToolUseEval => "tool-use-eval",
131    AgentEval => "agent-eval",
132    RagEval => "rag-eval",
133    SafetyEval => "safety-eval",
134    RegressionEval => "regression-eval",
135    HumanEval => "human-eval",
136    Custom => "custom",
137});
138
139eval_enum!(AiEvalTargetKind {
140    Prompt => "prompt",
141    ModelResponse => "model-response",
142    Conversation => "conversation",
143    Agent => "agent",
144    ToolCall => "tool-call",
145    RagPipeline => "rag-pipeline",
146    Guardrail => "guardrail",
147    Memory => "memory",
148    Custom => "custom",
149});
150
151eval_enum!(AiEvalJudgeKind {
152    Human => "human",
153    Model => "model",
154    Rule => "rule",
155    Heuristic => "heuristic",
156    GoldenAnswer => "golden-answer",
157    Pairwise => "pairwise",
158    Consensus => "consensus",
159    Custom => "custom",
160});
161
162eval_enum!(AiEvalMetricKind {
163    Helpfulness => "helpfulness",
164    Correctness => "correctness",
165    Faithfulness => "faithfulness",
166    Groundedness => "groundedness",
167    Relevance => "relevance",
168    InstructionFollowing => "instruction-following",
169    Safety => "safety",
170    RefusalQuality => "refusal-quality",
171    Toxicity => "toxicity",
172    Bias => "bias",
173    CitationQuality => "citation-quality",
174    ToolUseCorrectness => "tool-use-correctness",
175    Latency => "latency",
176    Cost => "cost",
177    Custom => "custom",
178});
179
180eval_enum!(AiEvalDatasetKind {
181    GoldenSet => "golden-set",
182    RedTeamSet => "red-team-set",
183    RegressionSet => "regression-set",
184    ConversationSet => "conversation-set",
185    RetrievalSet => "retrieval-set",
186    Synthetic => "synthetic",
187    ProductionSample => "production-sample",
188    Custom => "custom",
189});
190
191eval_enum!(AiEvalOutcome {
192    Passed => "passed",
193    Failed => "failed",
194    Warning => "warning",
195    Inconclusive => "inconclusive",
196    Error => "error",
197});
198
199eval_enum!(AiEvalFailureMode {
200    Hallucination => "hallucination",
201    UngroundedAnswer => "ungrounded-answer",
202    BadCitation => "bad-citation",
203    ToolError => "tool-error",
204    UnsafeOutput => "unsafe-output",
205    PolicyViolation => "policy-violation",
206    RefusalFailure => "refusal-failure",
207    OverRefusal => "over-refusal",
208    FormatFailure => "format-failure",
209    Unknown => "unknown",
210});
211
212#[derive(Clone, Copy, Debug, Eq, PartialEq)]
213pub enum AiEvalError {
214    Empty,
215    NonFinite,
216    OutOfRange,
217    UnknownLabel,
218}
219
220impl fmt::Display for AiEvalError {
221    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
222        match self {
223            Self::Empty => formatter.write_str("AI eval metadata text cannot be empty"),
224            Self::NonFinite => formatter.write_str("AI eval score must be finite"),
225            Self::OutOfRange => formatter.write_str("AI eval score must be in 0.0..=1.0"),
226            Self::UnknownLabel => formatter.write_str("unknown AI eval metadata label"),
227        }
228    }
229}
230
231impl Error for AiEvalError {}
232
233fn non_empty_text(value: impl AsRef<str>) -> Result<String, AiEvalError> {
234    let trimmed = value.as_ref().trim();
235    if trimmed.is_empty() {
236        Err(AiEvalError::Empty)
237    } else {
238        Ok(trimmed.to_string())
239    }
240}
241
242fn normalized_label(value: &str) -> Result<String, AiEvalError> {
243    let trimmed = value.trim();
244    if trimmed.is_empty() {
245        Err(AiEvalError::Empty)
246    } else {
247        Ok(trimmed.to_ascii_lowercase().replace(['_', ' '], "-"))
248    }
249}
250
251#[cfg(test)]
252mod tests {
253    use super::{
254        AiEvalDatasetKind, AiEvalError, AiEvalFailureMode, AiEvalJudgeKind, AiEvalKind,
255        AiEvalMetricKind, AiEvalOutcome, AiEvalRubricName, AiEvalRunId, AiEvalScore,
256        AiEvalTargetKind,
257    };
258    use core::{fmt, str::FromStr};
259
260    macro_rules! assert_text_newtype {
261        ($type:ty, $value:literal) => {{
262            let value = <$type>::new(concat!(" ", $value, " "))?;
263            assert_eq!(value.as_str(), $value);
264            assert_eq!(value.value(), $value);
265            assert_eq!(value.as_ref(), $value);
266            assert_eq!(value.to_string(), $value);
267            assert_eq!(<$type as TryFrom<&str>>::try_from($value)?, value);
268            assert_eq!(value.into_string(), $value.to_string());
269        }};
270    }
271
272    fn assert_enum_family<T>(variants: &[T]) -> Result<(), AiEvalError>
273    where
274        T: Copy + Eq + fmt::Debug + fmt::Display + FromStr<Err = AiEvalError>,
275    {
276        for variant in variants {
277            let label = variant.to_string();
278            assert_eq!(label.parse::<T>()?, *variant);
279            assert_eq!(label.replace('-', "_").parse::<T>()?, *variant);
280            assert_eq!(label.replace('-', " ").parse::<T>()?, *variant);
281        }
282        Ok(())
283    }
284
285    #[test]
286    fn validates_eval_text_newtypes() -> Result<(), AiEvalError> {
287        assert_text_newtype!(AiEvalRunId, "eval-001");
288        assert_text_newtype!(AiEvalRubricName, "helpfulness");
289        assert_eq!(AiEvalRunId::new("  "), Err(AiEvalError::Empty));
290        Ok(())
291    }
292
293    #[test]
294    fn validates_eval_scores() -> Result<(), AiEvalError> {
295        assert_eq!(AiEvalScore::new(0.0)?.value(), 0.0);
296        assert_eq!(AiEvalScore::new(1.0)?.value(), 1.0);
297        assert_eq!(AiEvalScore::new(-0.1), Err(AiEvalError::OutOfRange));
298        assert_eq!(AiEvalScore::new(1.1), Err(AiEvalError::OutOfRange));
299        assert_eq!(AiEvalScore::new(f64::INFINITY), Err(AiEvalError::NonFinite));
300        Ok(())
301    }
302
303    #[test]
304    fn displays_and_parses_eval_enums() -> Result<(), AiEvalError> {
305        assert_enum_family(AiEvalKind::ALL)?;
306        assert_enum_family(AiEvalTargetKind::ALL)?;
307        assert_enum_family(AiEvalJudgeKind::ALL)?;
308        assert_enum_family(AiEvalMetricKind::ALL)?;
309        assert_enum_family(AiEvalDatasetKind::ALL)?;
310        assert_enum_family(AiEvalOutcome::ALL)?;
311        assert_enum_family(AiEvalFailureMode::ALL)?;
312        assert_eq!(
313            "tool use eval".parse::<AiEvalKind>()?,
314            AiEvalKind::ToolUseEval
315        );
316        Ok(())
317    }
318}