async_openai/types/graders/
grader.rs

1use serde::{Deserialize, Serialize};
2
3use crate::types::{evals::EvalItem, ReasoningEffort};
4
5/// String check operation.
6#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
7#[serde(rename_all = "lowercase")]
8pub enum GraderStringCheckOperation {
9    /// Equal.
10    Eq,
11    /// Not equal.
12    Ne,
13    /// Like.
14    Like,
15    /// Case-insensitive like.
16    Ilike,
17}
18
19/// String check grader.
20#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
21pub struct GraderStringCheck {
22    /// The name of the grader.
23    pub name: String,
24    /// The input text. This may include template strings.
25    pub input: String,
26    /// The reference text. This may include template strings.
27    pub reference: String,
28    /// The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`.
29    pub operation: GraderStringCheckOperation,
30}
31
32/// Text similarity grader.
33#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
34pub struct GraderTextSimilarity {
35    /// The name of the grader.
36    pub name: String,
37    /// The text being graded.
38    pub input: String,
39    /// The text being graded against.
40    pub reference: String,
41    /// The evaluation metric to use.
42    pub evaluation_metric: GraderTextSimilarityEvaluationMetric,
43}
44
45/// Text similarity metric.
46#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
47#[serde(rename_all = "snake_case")]
48pub enum GraderTextSimilarityEvaluationMetric {
49    Cosine,
50    FuzzyMatch,
51    Bleu,
52    Gleu,
53    Meteor,
54    Rouge1,
55    Rouge2,
56    Rouge3,
57    Rouge4,
58    Rouge5,
59    RougeL,
60}
61
62/// Python grader.
63#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
64pub struct GraderPython {
65    /// The name of the grader.
66    pub name: String,
67    /// The source code of the python script.
68    pub source: String,
69    /// The image tag to use for the python script.
70    pub image_tag: Option<String>,
71}
72
73/// Score model grader.
74#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
75pub struct GraderScoreModel {
76    /// The name of the grader.
77    pub name: String,
78    /// The model to use for the evaluation.
79    pub model: String,
80    /// A list of chat messages forming the prompt or context.
81    pub input: Vec<EvalItem>,
82
83    /// Optional sampling parameters.
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub sampling_params: Option<GraderScoreModelSamplingParams>,
86    /// The range of the score. Defaults to [0, 1].
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub range: Option<Vec<f64>>,
89}
90
91#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
92pub struct GraderScoreModelSamplingParams {
93    /// A seed value to initialize the randomness, during sampling.
94    #[serde(skip_serializing_if = "Option::is_none")]
95    pub seed: Option<i32>,
96    /// An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    pub top_p: Option<f64>,
99    /// A higher temperature increases randomness in the outputs.
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub temperature: Option<f64>,
102    /// The maximum number of tokens the grader model may generate in its response.
103    #[serde(skip_serializing_if = "Option::is_none")]
104    pub max_completion_tokens: Option<i32>,
105    /// Optional reasoning effort parameter.
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub reasoning_effort: Option<ReasoningEffort>,
108}
109
110#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
111pub struct GraderLabelModel {
112    /// The name of the grader.
113    pub name: String,
114    /// The model to use for the evaluation. Must support structured outputs.
115    pub model: String,
116    /// A list of chat messages forming the prompt or context.
117    pub input: Vec<EvalItem>,
118    /// The labels to classify to each item in the evaluation.
119    pub labels: Vec<String>,
120    /// The labels that indicate a passing result. Must be a subset of labels.
121    pub passing_labels: Vec<String>,
122}
123
124#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
125#[serde(tag = "type", rename_all = "snake_case")]
126pub enum Graders {
127    StringCheck(GraderStringCheck),
128    TextSimilarity(GraderTextSimilarity),
129    Python(GraderPython),
130    ScoreModel(GraderScoreModel),
131    LabelModel(GraderLabelModel),
132}
133
134#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
135pub struct GraderMulti {
136    /// The name of the grader.
137    pub name: String,
138    pub graders: Graders,
139    /// A formula to calculate the output based on grader results.
140    pub calculate_output: String,
141}