1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{fmt, str::FromStr};
5use std::error::Error;
6
7pub mod prelude {
8 pub use crate::{
9 AiEvalDatasetKind, AiEvalError, AiEvalFailureMode, AiEvalJudgeKind, AiEvalKind,
10 AiEvalMetricKind, AiEvalOutcome, AiEvalRubricName, AiEvalRunId, AiEvalScore,
11 AiEvalTargetKind,
12 };
13}
14
15macro_rules! eval_text_newtype {
16 ($name:ident) => {
17 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
18 pub struct $name(String);
19
20 impl $name {
21 pub fn new(value: impl AsRef<str>) -> Result<Self, AiEvalError> {
22 non_empty_text(value).map(Self)
23 }
24
25 pub fn as_str(&self) -> &str {
26 &self.0
27 }
28
29 pub fn value(&self) -> &str {
30 self.as_str()
31 }
32
33 pub fn into_string(self) -> String {
34 self.0
35 }
36 }
37
38 impl AsRef<str> for $name {
39 fn as_ref(&self) -> &str {
40 self.as_str()
41 }
42 }
43
44 impl fmt::Display for $name {
45 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
46 formatter.write_str(self.as_str())
47 }
48 }
49
50 impl FromStr for $name {
51 type Err = AiEvalError;
52
53 fn from_str(value: &str) -> Result<Self, Self::Err> {
54 Self::new(value)
55 }
56 }
57
58 impl TryFrom<&str> for $name {
59 type Error = AiEvalError;
60
61 fn try_from(value: &str) -> Result<Self, Self::Error> {
62 Self::new(value)
63 }
64 }
65 };
66}
67
68macro_rules! eval_enum {
69 ($name:ident { $($variant:ident => $label:literal),+ $(,)? }) => {
70 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
71 pub enum $name {
72 $($variant),+
73 }
74
75 impl $name {
76 pub const ALL: &'static [Self] = &[$(Self::$variant),+];
77
78 pub const fn as_str(self) -> &'static str {
79 match self {
80 $(Self::$variant => $label),+
81 }
82 }
83 }
84
85 impl fmt::Display for $name {
86 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
87 formatter.write_str(self.as_str())
88 }
89 }
90
91 impl FromStr for $name {
92 type Err = AiEvalError;
93
94 fn from_str(value: &str) -> Result<Self, Self::Err> {
95 match normalized_label(value)?.as_str() {
96 $($label => Ok(Self::$variant),)+
97 _ => Err(AiEvalError::UnknownLabel),
98 }
99 }
100 }
101 };
102}
103
104eval_text_newtype!(AiEvalRunId);
105eval_text_newtype!(AiEvalRubricName);
106
107#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
108pub struct AiEvalScore(f64);
109
110impl AiEvalScore {
111 pub fn new(value: f64) -> Result<Self, AiEvalError> {
112 if !value.is_finite() {
113 return Err(AiEvalError::NonFinite);
114 }
115 if !(0.0..=1.0).contains(&value) {
116 return Err(AiEvalError::OutOfRange);
117 }
118 Ok(Self(value))
119 }
120
121 pub const fn value(self) -> f64 {
122 self.0
123 }
124}
125
126eval_enum!(AiEvalKind {
127 PromptEval => "prompt-eval",
128 ResponseEval => "response-eval",
129 ConversationEval => "conversation-eval",
130 ToolUseEval => "tool-use-eval",
131 AgentEval => "agent-eval",
132 RagEval => "rag-eval",
133 SafetyEval => "safety-eval",
134 RegressionEval => "regression-eval",
135 HumanEval => "human-eval",
136 Custom => "custom",
137});
138
139eval_enum!(AiEvalTargetKind {
140 Prompt => "prompt",
141 ModelResponse => "model-response",
142 Conversation => "conversation",
143 Agent => "agent",
144 ToolCall => "tool-call",
145 RagPipeline => "rag-pipeline",
146 Guardrail => "guardrail",
147 Memory => "memory",
148 Custom => "custom",
149});
150
151eval_enum!(AiEvalJudgeKind {
152 Human => "human",
153 Model => "model",
154 Rule => "rule",
155 Heuristic => "heuristic",
156 GoldenAnswer => "golden-answer",
157 Pairwise => "pairwise",
158 Consensus => "consensus",
159 Custom => "custom",
160});
161
162eval_enum!(AiEvalMetricKind {
163 Helpfulness => "helpfulness",
164 Correctness => "correctness",
165 Faithfulness => "faithfulness",
166 Groundedness => "groundedness",
167 Relevance => "relevance",
168 InstructionFollowing => "instruction-following",
169 Safety => "safety",
170 RefusalQuality => "refusal-quality",
171 Toxicity => "toxicity",
172 Bias => "bias",
173 CitationQuality => "citation-quality",
174 ToolUseCorrectness => "tool-use-correctness",
175 Latency => "latency",
176 Cost => "cost",
177 Custom => "custom",
178});
179
180eval_enum!(AiEvalDatasetKind {
181 GoldenSet => "golden-set",
182 RedTeamSet => "red-team-set",
183 RegressionSet => "regression-set",
184 ConversationSet => "conversation-set",
185 RetrievalSet => "retrieval-set",
186 Synthetic => "synthetic",
187 ProductionSample => "production-sample",
188 Custom => "custom",
189});
190
191eval_enum!(AiEvalOutcome {
192 Passed => "passed",
193 Failed => "failed",
194 Warning => "warning",
195 Inconclusive => "inconclusive",
196 Error => "error",
197});
198
199eval_enum!(AiEvalFailureMode {
200 Hallucination => "hallucination",
201 UngroundedAnswer => "ungrounded-answer",
202 BadCitation => "bad-citation",
203 ToolError => "tool-error",
204 UnsafeOutput => "unsafe-output",
205 PolicyViolation => "policy-violation",
206 RefusalFailure => "refusal-failure",
207 OverRefusal => "over-refusal",
208 FormatFailure => "format-failure",
209 Unknown => "unknown",
210});
211
212#[derive(Clone, Copy, Debug, Eq, PartialEq)]
213pub enum AiEvalError {
214 Empty,
215 NonFinite,
216 OutOfRange,
217 UnknownLabel,
218}
219
220impl fmt::Display for AiEvalError {
221 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
222 match self {
223 Self::Empty => formatter.write_str("AI eval metadata text cannot be empty"),
224 Self::NonFinite => formatter.write_str("AI eval score must be finite"),
225 Self::OutOfRange => formatter.write_str("AI eval score must be in 0.0..=1.0"),
226 Self::UnknownLabel => formatter.write_str("unknown AI eval metadata label"),
227 }
228 }
229}
230
231impl Error for AiEvalError {}
232
233fn non_empty_text(value: impl AsRef<str>) -> Result<String, AiEvalError> {
234 let trimmed = value.as_ref().trim();
235 if trimmed.is_empty() {
236 Err(AiEvalError::Empty)
237 } else {
238 Ok(trimmed.to_string())
239 }
240}
241
242fn normalized_label(value: &str) -> Result<String, AiEvalError> {
243 let trimmed = value.trim();
244 if trimmed.is_empty() {
245 Err(AiEvalError::Empty)
246 } else {
247 Ok(trimmed.to_ascii_lowercase().replace(['_', ' '], "-"))
248 }
249}
250
251#[cfg(test)]
252mod tests {
253 use super::{
254 AiEvalDatasetKind, AiEvalError, AiEvalFailureMode, AiEvalJudgeKind, AiEvalKind,
255 AiEvalMetricKind, AiEvalOutcome, AiEvalRubricName, AiEvalRunId, AiEvalScore,
256 AiEvalTargetKind,
257 };
258 use core::{fmt, str::FromStr};
259
260 macro_rules! assert_text_newtype {
261 ($type:ty, $value:literal) => {{
262 let value = <$type>::new(concat!(" ", $value, " "))?;
263 assert_eq!(value.as_str(), $value);
264 assert_eq!(value.value(), $value);
265 assert_eq!(value.as_ref(), $value);
266 assert_eq!(value.to_string(), $value);
267 assert_eq!(<$type as TryFrom<&str>>::try_from($value)?, value);
268 assert_eq!(value.into_string(), $value.to_string());
269 }};
270 }
271
272 fn assert_enum_family<T>(variants: &[T]) -> Result<(), AiEvalError>
273 where
274 T: Copy + Eq + fmt::Debug + fmt::Display + FromStr<Err = AiEvalError>,
275 {
276 for variant in variants {
277 let label = variant.to_string();
278 assert_eq!(label.parse::<T>()?, *variant);
279 assert_eq!(label.replace('-', "_").parse::<T>()?, *variant);
280 assert_eq!(label.replace('-', " ").parse::<T>()?, *variant);
281 }
282 Ok(())
283 }
284
285 #[test]
286 fn validates_eval_text_newtypes() -> Result<(), AiEvalError> {
287 assert_text_newtype!(AiEvalRunId, "eval-001");
288 assert_text_newtype!(AiEvalRubricName, "helpfulness");
289 assert_eq!(AiEvalRunId::new(" "), Err(AiEvalError::Empty));
290 Ok(())
291 }
292
293 #[test]
294 fn validates_eval_scores() -> Result<(), AiEvalError> {
295 assert_eq!(AiEvalScore::new(0.0)?.value(), 0.0);
296 assert_eq!(AiEvalScore::new(1.0)?.value(), 1.0);
297 assert_eq!(AiEvalScore::new(-0.1), Err(AiEvalError::OutOfRange));
298 assert_eq!(AiEvalScore::new(1.1), Err(AiEvalError::OutOfRange));
299 assert_eq!(AiEvalScore::new(f64::INFINITY), Err(AiEvalError::NonFinite));
300 Ok(())
301 }
302
303 #[test]
304 fn displays_and_parses_eval_enums() -> Result<(), AiEvalError> {
305 assert_enum_family(AiEvalKind::ALL)?;
306 assert_enum_family(AiEvalTargetKind::ALL)?;
307 assert_enum_family(AiEvalJudgeKind::ALL)?;
308 assert_enum_family(AiEvalMetricKind::ALL)?;
309 assert_enum_family(AiEvalDatasetKind::ALL)?;
310 assert_enum_family(AiEvalOutcome::ALL)?;
311 assert_enum_family(AiEvalFailureMode::ALL)?;
312 assert_eq!(
313 "tool use eval".parse::<AiEvalKind>()?,
314 AiEvalKind::ToolUseEval
315 );
316 Ok(())
317 }
318}