use-ai-eval 0.0.1

AI evaluation metadata primitives for RustUse.
Documentation
#![forbid(unsafe_code)]
#![doc = include_str!("../README.md")]

use core::{fmt, str::FromStr};
use std::error::Error;

pub mod prelude {
    pub use crate::{
        AiEvalDatasetKind, AiEvalError, AiEvalFailureMode, AiEvalJudgeKind, AiEvalKind,
        AiEvalMetricKind, AiEvalOutcome, AiEvalRubricName, AiEvalRunId, AiEvalScore,
        AiEvalTargetKind,
    };
}

macro_rules! eval_text_newtype {
    ($name:ident) => {
        #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
        pub struct $name(String);

        impl $name {
            pub fn new(value: impl AsRef<str>) -> Result<Self, AiEvalError> {
                non_empty_text(value).map(Self)
            }

            pub fn as_str(&self) -> &str {
                &self.0
            }

            pub fn value(&self) -> &str {
                self.as_str()
            }

            pub fn into_string(self) -> String {
                self.0
            }
        }

        impl AsRef<str> for $name {
            fn as_ref(&self) -> &str {
                self.as_str()
            }
        }

        impl fmt::Display for $name {
            fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
                formatter.write_str(self.as_str())
            }
        }

        impl FromStr for $name {
            type Err = AiEvalError;

            fn from_str(value: &str) -> Result<Self, Self::Err> {
                Self::new(value)
            }
        }

        impl TryFrom<&str> for $name {
            type Error = AiEvalError;

            fn try_from(value: &str) -> Result<Self, Self::Error> {
                Self::new(value)
            }
        }
    };
}

macro_rules! eval_enum {
    ($name:ident { $($variant:ident => $label:literal),+ $(,)? }) => {
        #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
        pub enum $name {
            $($variant),+
        }

        impl $name {
            pub const ALL: &'static [Self] = &[$(Self::$variant),+];

            pub const fn as_str(self) -> &'static str {
                match self {
                    $(Self::$variant => $label),+
                }
            }
        }

        impl fmt::Display for $name {
            fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
                formatter.write_str(self.as_str())
            }
        }

        impl FromStr for $name {
            type Err = AiEvalError;

            fn from_str(value: &str) -> Result<Self, Self::Err> {
                match normalized_label(value)?.as_str() {
                    $($label => Ok(Self::$variant),)+
                    _ => Err(AiEvalError::UnknownLabel),
                }
            }
        }
    };
}

eval_text_newtype!(AiEvalRunId);
eval_text_newtype!(AiEvalRubricName);

#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
pub struct AiEvalScore(f64);

impl AiEvalScore {
    pub fn new(value: f64) -> Result<Self, AiEvalError> {
        if !value.is_finite() {
            return Err(AiEvalError::NonFinite);
        }
        if !(0.0..=1.0).contains(&value) {
            return Err(AiEvalError::OutOfRange);
        }
        Ok(Self(value))
    }

    pub const fn value(self) -> f64 {
        self.0
    }
}

eval_enum!(AiEvalKind {
    PromptEval => "prompt-eval",
    ResponseEval => "response-eval",
    ConversationEval => "conversation-eval",
    ToolUseEval => "tool-use-eval",
    AgentEval => "agent-eval",
    RagEval => "rag-eval",
    SafetyEval => "safety-eval",
    RegressionEval => "regression-eval",
    HumanEval => "human-eval",
    Custom => "custom",
});

eval_enum!(AiEvalTargetKind {
    Prompt => "prompt",
    ModelResponse => "model-response",
    Conversation => "conversation",
    Agent => "agent",
    ToolCall => "tool-call",
    RagPipeline => "rag-pipeline",
    Guardrail => "guardrail",
    Memory => "memory",
    Custom => "custom",
});

eval_enum!(AiEvalJudgeKind {
    Human => "human",
    Model => "model",
    Rule => "rule",
    Heuristic => "heuristic",
    GoldenAnswer => "golden-answer",
    Pairwise => "pairwise",
    Consensus => "consensus",
    Custom => "custom",
});

eval_enum!(AiEvalMetricKind {
    Helpfulness => "helpfulness",
    Correctness => "correctness",
    Faithfulness => "faithfulness",
    Groundedness => "groundedness",
    Relevance => "relevance",
    InstructionFollowing => "instruction-following",
    Safety => "safety",
    RefusalQuality => "refusal-quality",
    Toxicity => "toxicity",
    Bias => "bias",
    CitationQuality => "citation-quality",
    ToolUseCorrectness => "tool-use-correctness",
    Latency => "latency",
    Cost => "cost",
    Custom => "custom",
});

eval_enum!(AiEvalDatasetKind {
    GoldenSet => "golden-set",
    RedTeamSet => "red-team-set",
    RegressionSet => "regression-set",
    ConversationSet => "conversation-set",
    RetrievalSet => "retrieval-set",
    Synthetic => "synthetic",
    ProductionSample => "production-sample",
    Custom => "custom",
});

eval_enum!(AiEvalOutcome {
    Passed => "passed",
    Failed => "failed",
    Warning => "warning",
    Inconclusive => "inconclusive",
    Error => "error",
});

eval_enum!(AiEvalFailureMode {
    Hallucination => "hallucination",
    UngroundedAnswer => "ungrounded-answer",
    BadCitation => "bad-citation",
    ToolError => "tool-error",
    UnsafeOutput => "unsafe-output",
    PolicyViolation => "policy-violation",
    RefusalFailure => "refusal-failure",
    OverRefusal => "over-refusal",
    FormatFailure => "format-failure",
    Unknown => "unknown",
});

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum AiEvalError {
    Empty,
    NonFinite,
    OutOfRange,
    UnknownLabel,
}

impl fmt::Display for AiEvalError {
    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Empty => formatter.write_str("AI eval metadata text cannot be empty"),
            Self::NonFinite => formatter.write_str("AI eval score must be finite"),
            Self::OutOfRange => formatter.write_str("AI eval score must be in 0.0..=1.0"),
            Self::UnknownLabel => formatter.write_str("unknown AI eval metadata label"),
        }
    }
}

impl Error for AiEvalError {}

fn non_empty_text(value: impl AsRef<str>) -> Result<String, AiEvalError> {
    let trimmed = value.as_ref().trim();
    if trimmed.is_empty() {
        Err(AiEvalError::Empty)
    } else {
        Ok(trimmed.to_string())
    }
}

fn normalized_label(value: &str) -> Result<String, AiEvalError> {
    let trimmed = value.trim();
    if trimmed.is_empty() {
        Err(AiEvalError::Empty)
    } else {
        Ok(trimmed.to_ascii_lowercase().replace(['_', ' '], "-"))
    }
}

#[cfg(test)]
mod tests {
    use super::{
        AiEvalDatasetKind, AiEvalError, AiEvalFailureMode, AiEvalJudgeKind, AiEvalKind,
        AiEvalMetricKind, AiEvalOutcome, AiEvalRubricName, AiEvalRunId, AiEvalScore,
        AiEvalTargetKind,
    };
    use core::{fmt, str::FromStr};

    macro_rules! assert_text_newtype {
        ($type:ty, $value:literal) => {{
            let value = <$type>::new(concat!(" ", $value, " "))?;
            assert_eq!(value.as_str(), $value);
            assert_eq!(value.value(), $value);
            assert_eq!(value.as_ref(), $value);
            assert_eq!(value.to_string(), $value);
            assert_eq!(<$type as TryFrom<&str>>::try_from($value)?, value);
            assert_eq!(value.into_string(), $value.to_string());
        }};
    }

    fn assert_enum_family<T>(variants: &[T]) -> Result<(), AiEvalError>
    where
        T: Copy + Eq + fmt::Debug + fmt::Display + FromStr<Err = AiEvalError>,
    {
        for variant in variants {
            let label = variant.to_string();
            assert_eq!(label.parse::<T>()?, *variant);
            assert_eq!(label.replace('-', "_").parse::<T>()?, *variant);
            assert_eq!(label.replace('-', " ").parse::<T>()?, *variant);
        }
        Ok(())
    }

    #[test]
    fn validates_eval_text_newtypes() -> Result<(), AiEvalError> {
        assert_text_newtype!(AiEvalRunId, "eval-001");
        assert_text_newtype!(AiEvalRubricName, "helpfulness");
        assert_eq!(AiEvalRunId::new("  "), Err(AiEvalError::Empty));
        Ok(())
    }

    #[test]
    fn validates_eval_scores() -> Result<(), AiEvalError> {
        assert_eq!(AiEvalScore::new(0.0)?.value(), 0.0);
        assert_eq!(AiEvalScore::new(1.0)?.value(), 1.0);
        assert_eq!(AiEvalScore::new(-0.1), Err(AiEvalError::OutOfRange));
        assert_eq!(AiEvalScore::new(1.1), Err(AiEvalError::OutOfRange));
        assert_eq!(AiEvalScore::new(f64::INFINITY), Err(AiEvalError::NonFinite));
        Ok(())
    }

    #[test]
    fn displays_and_parses_eval_enums() -> Result<(), AiEvalError> {
        assert_enum_family(AiEvalKind::ALL)?;
        assert_enum_family(AiEvalTargetKind::ALL)?;
        assert_enum_family(AiEvalJudgeKind::ALL)?;
        assert_enum_family(AiEvalMetricKind::ALL)?;
        assert_enum_family(AiEvalDatasetKind::ALL)?;
        assert_enum_family(AiEvalOutcome::ALL)?;
        assert_enum_family(AiEvalFailureMode::ALL)?;
        assert_eq!(
            "tool use eval".parse::<AiEvalKind>()?,
            AiEvalKind::ToolUseEval
        );
        Ok(())
    }
}