async_openai/types/graders/
grader.rs

1use serde::{Deserialize, Serialize};
2
3use crate::types::evals::EvalItem;
4use crate::types::graders::ReasoningEffort;
5
6/// String check operation.
7#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
8#[serde(rename_all = "lowercase")]
9pub enum GraderStringCheckOperation {
10    /// Equal.
11    Eq,
12    /// Not equal.
13    Ne,
14    /// Like.
15    Like,
16    /// Case-insensitive like.
17    Ilike,
18}
19
20/// String check grader.
21#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
22pub struct GraderStringCheck {
23    /// The name of the grader.
24    pub name: String,
25    /// The input text. This may include template strings.
26    pub input: String,
27    /// The reference text. This may include template strings.
28    pub reference: String,
29    /// The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`.
30    pub operation: GraderStringCheckOperation,
31}
32
33/// Text similarity grader.
34#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
35pub struct GraderTextSimilarity {
36    /// The name of the grader.
37    pub name: String,
38    /// The text being graded.
39    pub input: String,
40    /// The text being graded against.
41    pub reference: String,
42    /// The evaluation metric to use.
43    pub evaluation_metric: GraderTextSimilarityEvaluationMetric,
44}
45
46/// Text similarity metric.
47#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
48#[serde(rename_all = "snake_case")]
49pub enum GraderTextSimilarityEvaluationMetric {
50    Cosine,
51    FuzzyMatch,
52    Bleu,
53    Gleu,
54    Meteor,
55    Rouge1,
56    Rouge2,
57    Rouge3,
58    Rouge4,
59    Rouge5,
60    RougeL,
61}
62
63/// Python grader.
64#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
65pub struct GraderPython {
66    /// The name of the grader.
67    pub name: String,
68    /// The source code of the python script.
69    pub source: String,
70    /// The image tag to use for the python script.
71    pub image_tag: Option<String>,
72}
73
74/// Score model grader.
75#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
76pub struct GraderScoreModel {
77    /// The name of the grader.
78    pub name: String,
79    /// The model to use for the evaluation.
80    pub model: String,
81    /// A list of chat messages forming the prompt or context.
82    pub input: Vec<EvalItem>,
83
84    /// Optional sampling parameters.
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub sampling_params: Option<GraderScoreModelSamplingParams>,
87    /// The range of the score. Defaults to [0, 1].
88    #[serde(skip_serializing_if = "Option::is_none")]
89    pub range: Option<Vec<f64>>,
90}
91
92#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
93pub struct GraderScoreModelSamplingParams {
94    /// A seed value to initialize the randomness, during sampling.
95    #[serde(skip_serializing_if = "Option::is_none")]
96    pub seed: Option<i32>,
97    /// An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
98    #[serde(skip_serializing_if = "Option::is_none")]
99    pub top_p: Option<f64>,
100    /// A higher temperature increases randomness in the outputs.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub temperature: Option<f64>,
103    /// The maximum number of tokens the grader model may generate in its response.
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub max_completion_tokens: Option<i32>,
106    /// Optional reasoning effort parameter.
107    #[serde(skip_serializing_if = "Option::is_none")]
108    pub reasoning_effort: Option<ReasoningEffort>,
109}
110
111#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
112pub struct GraderLabelModel {
113    /// The name of the grader.
114    pub name: String,
115    /// The model to use for the evaluation. Must support structured outputs.
116    pub model: String,
117    /// A list of chat messages forming the prompt or context.
118    pub input: Vec<EvalItem>,
119    /// The labels to classify to each item in the evaluation.
120    pub labels: Vec<String>,
121    /// The labels that indicate a passing result. Must be a subset of labels.
122    pub passing_labels: Vec<String>,
123}
124
125#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
126#[serde(tag = "type", rename_all = "snake_case")]
127pub enum Graders {
128    StringCheck(GraderStringCheck),
129    TextSimilarity(GraderTextSimilarity),
130    Python(GraderPython),
131    ScoreModel(GraderScoreModel),
132    LabelModel(GraderLabelModel),
133}
134
135#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
136pub struct GraderMulti {
137    /// The name of the grader.
138    pub name: String,
139    pub graders: Graders,
140    /// A formula to calculate the output based on grader results.
141    pub calculate_output: String,
142}