1use derive_builder::Builder;
2use serde::{Deserialize, Serialize};
3
4use crate::error::OpenAIError;
5use crate::types::chat::{ChatCompletionTool, ImageDetail, InputAudio, ResponseFormat};
6use crate::types::graders::{
7 GraderLabelModel, GraderPython, GraderScoreModel, GraderStringCheck, GraderTextSimilarity,
8};
9use crate::types::responses::{ResponseTextParam, Tool};
10use crate::types::Metadata;
11
12pub use crate::types::responses::{EasyInputMessage, InputTextContent, ReasoningEffort};
14
15#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
22pub struct Eval {
23 pub object: String,
25 pub id: String,
27 pub name: String,
29 pub data_source_config: EvalDataSourceConfig,
31 pub testing_criteria: Vec<EvalTestingCriterion>,
33 pub created_at: u64,
35 pub metadata: Metadata,
36}
37
38#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
40#[serde(tag = "type", rename_all = "snake_case")]
41pub enum EvalDataSourceConfig {
42 Custom(EvalCustomDataSourceConfig),
44 Logs(EvalLogsDataSourceConfig),
46 #[serde(rename = "stored_completions")]
48 StoredCompletions(EvalStoredCompletionsDataSourceConfig),
49}
50
51#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
53pub struct EvalCustomDataSourceConfig {
54 #[serde(rename = "type")]
56 pub r#type: String,
57 pub schema: serde_json::Value,
59}
60
61#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
63pub struct EvalLogsDataSourceConfig {
64 #[serde(rename = "type")]
66 pub r#type: String,
67 #[serde(skip_serializing_if = "Option::is_none")]
69 pub metadata: Option<Metadata>,
70 pub schema: serde_json::Value,
72}
73
74#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
76pub struct EvalStoredCompletionsDataSourceConfig {
77 #[serde(rename = "type")]
79 pub r#type: String,
80 #[serde(skip_serializing_if = "Option::is_none")]
82 pub metadata: Option<Metadata>,
83 pub schema: serde_json::Value,
85}
86
87#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
89#[serde(tag = "type", rename_all = "snake_case")]
90pub enum EvalTestingCriterion {
91 LabelModel(EvalGraderLabelModel),
93 StringCheck(EvalGraderStringCheck),
95 TextSimilarity(EvalGraderTextSimilarity),
97 Python(EvalGraderPython),
99 ScoreModel(EvalGraderScoreModel),
101}
102
103#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
105#[serde(transparent)]
106pub struct EvalGraderLabelModel(pub GraderLabelModel);
107
108#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
110#[serde(transparent)]
111pub struct EvalGraderStringCheck(pub GraderStringCheck);
112
113#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
115pub struct EvalGraderTextSimilarity {
116 #[serde(flatten)]
117 pub grader: GraderTextSimilarity,
118 pub pass_threshold: f64,
119}
120
121#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
123#[serde(rename_all = "snake_case")]
124pub enum TextSimilarityMetric {
125 Cosine,
127 FuzzyMatch,
129 Bleu,
131 Gleu,
133 Meteor,
135 Rouge1,
137 Rouge2,
139 Rouge3,
141 Rouge4,
143 Rouge5,
145 RougeL,
147}
148
149#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
152pub struct EvalGraderPython {
153 #[serde(flatten)]
154 pub grader: GraderPython,
155 pub pass_threshold: Option<f64>,
156}
157
158#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
159pub struct SamplingParams {
160 #[serde(skip_serializing_if = "Option::is_none")]
162 pub seed: Option<i32>,
163 #[serde(skip_serializing_if = "Option::is_none")]
165 pub top_p: Option<f64>,
166 #[serde(skip_serializing_if = "Option::is_none")]
168 pub temperature: Option<f64>,
169 #[serde(skip_serializing_if = "Option::is_none")]
171 pub max_completion_tokens: Option<i32>,
172 #[serde(skip_serializing_if = "Option::is_none")]
174 pub reasoning_effort: Option<ReasoningEffort>,
175}
176
177#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
180pub struct EvalGraderScoreModel {
181 #[serde(flatten)]
182 pub grader: GraderScoreModel,
183 pub pass_threshold: Option<f64>,
185}
186
187#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
188pub struct EvalItem {
189 pub role: EvalItemRole,
192 pub content: EvalItemContent,
195}
196
197#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
199#[serde(rename_all = "lowercase")]
200pub enum EvalItemRole {
201 User,
203 Assistant,
205 System,
207 Developer,
209}
210
211#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
213pub struct EvalItemContentOutputText {
214 pub text: String,
216}
217
218#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
220pub struct EvalItemInputImage {
221 pub image_url: String,
223 #[serde(skip_serializing_if = "Option::is_none")]
226 pub detail: Option<ImageDetail>,
227}
228
229#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
232#[serde(untagged)]
233pub enum EvalItemContent {
234 Array(Vec<EvalItemContentItem>),
236 Single(EvalItemContentItem),
238}
239
240#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
242#[serde(tag = "type", rename_all = "snake_case")]
243pub enum EvalItemContentItem {
244 InputText(InputTextContent),
246 OutputText(EvalItemContentOutputText),
248 InputImage(EvalItemInputImage),
250 InputAudio(InputAudio),
252 #[serde(untagged)]
254 Text(String),
255}
256
257#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
259pub struct EvalList {
260 pub object: String,
262 pub data: Vec<Eval>,
264 pub first_id: Option<String>,
266 pub last_id: Option<String>,
268 pub has_more: bool,
270}
271
272#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
273#[builder(name = "CreateEvalRequestArgs")]
274#[builder(pattern = "mutable")]
275#[builder(setter(into, strip_option), default)]
276#[builder(derive(Debug))]
277#[builder(build_fn(error = "OpenAIError"))]
278pub struct CreateEvalRequest {
279 pub name: Option<String>,
281 pub data_source_config: CreateEvalDataSourceConfig,
284 pub testing_criteria: Vec<CreateEvalTestingCriterion>,
288 #[serde(skip_serializing_if = "Option::is_none")]
289 pub metadata: Option<Metadata>,
290}
291
292#[derive(Debug, Serialize, Clone, PartialEq)]
293#[serde(tag = "type", rename_all = "snake_case")]
294pub enum CreateEvalDataSourceConfig {
295 Custom(CreateEvalCustomDataSourceConfig),
300 Logs(CreateEvalLogsDataSourceConfig),
303}
304
305impl Default for CreateEvalDataSourceConfig {
306 fn default() -> Self {
307 Self::Custom(CreateEvalCustomDataSourceConfig::default())
308 }
309}
310
311#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
312#[builder(name = "CreateEvalCustomDataSourceConfigArgs")]
313#[builder(pattern = "mutable")]
314#[builder(setter(into, strip_option), default)]
315#[builder(derive(Debug))]
316#[builder(build_fn(error = "OpenAIError"))]
317pub struct CreateEvalCustomDataSourceConfig {
318 pub item_schema: serde_json::Value,
320 #[serde(skip_serializing_if = "Option::is_none")]
323 pub include_sample_schema: Option<bool>,
324}
325
326#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
328#[builder(name = "CreateEvalLogsDataSourceConfigArgs")]
329#[builder(pattern = "mutable")]
330#[builder(setter(into, strip_option), default)]
331#[builder(derive(Debug))]
332#[builder(build_fn(error = "OpenAIError"))]
333pub struct CreateEvalLogsDataSourceConfig {
334 #[serde(skip_serializing_if = "Option::is_none")]
336 pub metadata: Option<Metadata>,
337}
338
339#[derive(Debug, Serialize, Clone, PartialEq)]
340#[serde(tag = "type", rename_all = "snake_case")]
341pub enum CreateEvalTestingCriterion {
342 LabelModel(CreateEvalLabelModelGrader),
345 StringCheck(EvalGraderStringCheck),
348 TextSimilarity(EvalGraderTextSimilarity),
350 Python(EvalGraderPython),
352 ScoreModel(EvalGraderScoreModel),
354}
355
356#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
358#[builder(name = "CreateEvalLabelModelGraderArgs")]
359#[builder(pattern = "mutable")]
360#[builder(setter(into, strip_option), default)]
361#[builder(derive(Debug))]
362#[builder(build_fn(error = "OpenAIError"))]
363pub struct CreateEvalLabelModelGrader {
364 pub name: String,
366 pub model: String,
368 pub input: Vec<CreateEvalItem>,
371 pub labels: Vec<String>,
373 pub passing_labels: Vec<String>,
375}
376
377#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
378pub struct SimpleInputMessage {
379 pub role: String,
381 pub content: String,
383}
384
385#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
387#[serde(tag = "type", rename_all = "snake_case")]
388pub enum CreateEvalItem {
389 Message(EvalItem),
395
396 #[serde(untagged)]
398 Simple(SimpleInputMessage),
399}
400
401#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
403#[builder(name = "UpdateEvalRequestArgs")]
404#[builder(pattern = "mutable")]
405#[builder(setter(into, strip_option), default)]
406#[builder(derive(Debug))]
407#[builder(build_fn(error = "OpenAIError"))]
408pub struct UpdateEvalRequest {
409 #[serde(skip_serializing_if = "Option::is_none")]
411 pub name: Option<String>,
412 #[serde(skip_serializing_if = "Option::is_none")]
414 pub metadata: Option<Metadata>,
415}
416
417#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
419pub struct DeleteEvalResponse {
420 pub object: String,
422 pub deleted: bool,
424 pub eval_id: String,
426}
427
428#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
432pub struct EvalRun {
433 pub object: String,
435 pub id: String,
437 pub eval_id: String,
439 pub status: EvalRunStatus,
441 pub model: String,
443 pub name: String,
445 pub created_at: u64,
447 pub report_url: String,
449 pub result_counts: EvalRunResultCounts,
451 pub per_model_usage: Option<Vec<EvalRunModelUsage>>,
453 pub per_testing_criteria_results: Option<Vec<EvalRunTestingCriteriaResult>>,
455 pub data_source: EvalRunDataSource,
457 pub metadata: Metadata,
459 pub error: Option<EvalApiError>,
461}
462
463#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
465#[serde(rename_all = "snake_case")]
466pub enum EvalRunStatus {
467 Queued,
469 InProgress,
471 Completed,
473 Failed,
475 Canceled,
477}
478
479#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
481pub struct EvalRunResultCounts {
482 pub total: u32,
484 pub errored: u32,
486 pub failed: u32,
488 pub passed: u32,
490}
491
492#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
494pub struct EvalRunModelUsage {
495 pub model_name: String,
497 pub invocation_count: u32,
499 pub prompt_tokens: u32,
501 pub completion_tokens: u32,
503 pub total_tokens: u32,
505 pub cached_tokens: u32,
507}
508
509#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
511pub struct EvalRunTestingCriteriaResult {
512 pub testing_criteria: String,
514 pub passed: u32,
516 pub failed: u32,
518}
519
520#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
522#[serde(tag = "type", rename_all = "snake_case")]
523pub enum EvalRunDataSource {
524 Jsonl(CreateEvalJsonlRunDataSource),
526 Completions(CreateEvalCompletionsRunDataSource),
528 Responses(CreateEvalResponsesRunDataSource),
530}
531
532#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
534pub struct CreateEvalJsonlRunDataSource {
535 pub source: EvalJsonlSource,
537}
538
539#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
541#[serde(tag = "type", rename_all = "snake_case")]
542pub enum EvalJsonlSource {
543 FileContent(EvalJsonlFileContentSource),
545 FileId(EvalJsonlFileIdSource),
547}
548
549#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
551pub struct EvalJsonlFileContentSource {
552 pub content: Vec<EvalJsonlContentItem>,
554}
555
556#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
558pub struct EvalJsonlFileIdSource {
559 pub id: String,
561}
562
563#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
565pub struct EvalJsonlContentItem {
566 pub item: serde_json::Value,
568 #[serde(skip_serializing_if = "Option::is_none")]
570 pub sample: Option<serde_json::Value>,
571}
572
573#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
575pub struct CreateEvalCompletionsRunDataSource {
576 pub input_messages: EvalInputMessages,
580 #[serde(skip_serializing_if = "Option::is_none")]
582 pub sampling_params: Option<EvalSamplingParams>,
583 pub model: String,
585 pub source: EvalCompletionsSource,
587}
588
589#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
590pub struct TemplateInputMessages {
591 pub template: Vec<CreateEvalItem>,
594}
595
596#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
597pub struct ItemReference {
598 pub item_reference: String,
600}
601
602#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
604#[serde(tag = "type", rename_all = "snake_case")]
605pub enum EvalInputMessages {
606 Template(TemplateInputMessages),
608 ItemReference(ItemReference),
610}
611
612#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
614pub struct EvalSamplingParams {
615 #[serde(skip_serializing_if = "Option::is_none")]
617 pub seed: Option<i32>,
618 #[serde(skip_serializing_if = "Option::is_none")]
620 pub top_p: Option<f64>,
621 #[serde(skip_serializing_if = "Option::is_none")]
623 pub temperature: Option<f64>,
624 #[serde(skip_serializing_if = "Option::is_none")]
626 pub max_completion_tokens: Option<i32>,
627 #[serde(skip_serializing_if = "Option::is_none")]
629 pub reasoning_effort: Option<ReasoningEffort>,
630 #[serde(skip_serializing_if = "Option::is_none")]
632 pub response_format: Option<ResponseFormat>,
633 #[serde(skip_serializing_if = "Option::is_none")]
635 pub tools: Option<Vec<ChatCompletionTool>>,
636}
637
638#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
639pub struct EvalResponsesSamplingParams {
640 #[serde(skip_serializing_if = "Option::is_none")]
642 pub seed: Option<i32>,
643 #[serde(skip_serializing_if = "Option::is_none")]
645 pub top_p: Option<f64>,
646 #[serde(skip_serializing_if = "Option::is_none")]
648 pub temperature: Option<f64>,
649 #[serde(skip_serializing_if = "Option::is_none")]
651 pub max_completion_tokens: Option<u32>,
652 #[serde(skip_serializing_if = "Option::is_none")]
654 pub reasoning_effort: Option<ReasoningEffort>,
655 #[serde(skip_serializing_if = "Option::is_none")]
657 pub response_format: Option<ResponseFormat>,
658 #[serde(skip_serializing_if = "Option::is_none")]
660 pub tools: Option<Vec<Tool>>,
661 #[serde(skip_serializing_if = "Option::is_none")]
666 pub text: Option<ResponseTextParam>,
667}
668
669#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
671#[serde(tag = "type", rename_all = "snake_case")]
672pub enum EvalCompletionsSource {
673 FileContent(EvalJsonlFileContentSource),
675 FileId(EvalJsonlFileIdSource),
677 StoredCompletions(EvalStoredCompletionsSource),
679}
680
681#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
683pub struct EvalStoredCompletionsSource {
684 #[serde(skip_serializing_if = "Option::is_none")]
686 pub metadata: Option<Metadata>,
687 #[serde(skip_serializing_if = "Option::is_none")]
689 pub model: Option<String>,
690 #[serde(skip_serializing_if = "Option::is_none")]
692 pub created_after: Option<u64>,
693 #[serde(skip_serializing_if = "Option::is_none")]
695 pub created_before: Option<u64>,
696 #[serde(skip_serializing_if = "Option::is_none")]
698 pub limit: Option<i32>,
699}
700
701#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
703pub struct CreateEvalResponsesRunDataSource {
704 #[serde(skip_serializing_if = "Option::is_none")]
706 pub input_messages: Option<EvalInputMessages>,
707 #[serde(skip_serializing_if = "Option::is_none")]
709 pub sampling_params: Option<EvalResponsesSamplingParams>,
710 #[serde(skip_serializing_if = "Option::is_none")]
711 pub model: Option<String>,
712 pub source: EvalResponsesRunSource,
714}
715
716#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
718#[serde(tag = "type", rename_all = "snake_case")]
719pub enum EvalResponsesRunSource {
720 FileContent(EvalJsonlFileContentSource),
722 FileId(EvalJsonlFileIdSource),
724 Responses(EvalResponsesSource),
726}
727
728#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
730pub struct EvalResponsesSource {
731 #[serde(skip_serializing_if = "Option::is_none")]
733 pub metadata: Option<serde_json::Value>,
734 #[serde(skip_serializing_if = "Option::is_none")]
736 pub model: Option<String>,
737 #[serde(skip_serializing_if = "Option::is_none")]
739 pub instructions_search: Option<String>,
740 #[serde(skip_serializing_if = "Option::is_none")]
742 pub created_after: Option<u64>,
743 #[serde(skip_serializing_if = "Option::is_none")]
745 pub created_before: Option<u64>,
746 #[serde(skip_serializing_if = "Option::is_none")]
748 pub reasoning_effort: Option<ReasoningEffort>,
749 #[serde(skip_serializing_if = "Option::is_none")]
751 pub temperature: Option<f64>,
752 #[serde(skip_serializing_if = "Option::is_none")]
754 pub top_p: Option<f64>,
755 #[serde(skip_serializing_if = "Option::is_none")]
757 pub users: Option<Vec<String>>,
758 #[serde(skip_serializing_if = "Option::is_none")]
760 pub tools: Option<Vec<String>>,
761}
762
763#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
765pub struct EvalRunList {
766 pub object: String,
768 pub data: Vec<EvalRun>,
770 pub first_id: Option<String>,
772 pub last_id: Option<String>,
774 pub has_more: bool,
776}
777
778#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
780#[builder(name = "CreateEvalRunRequestArgs")]
781#[builder(pattern = "mutable")]
782#[builder(setter(into, strip_option), default)]
783#[builder(derive(Debug))]
784#[builder(build_fn(error = "OpenAIError"))]
785pub struct CreateEvalRunRequest {
786 #[serde(skip_serializing_if = "Option::is_none")]
788 pub name: Option<String>,
789 pub data_source: CreateEvalRunDataSource,
791 #[serde(skip_serializing_if = "Option::is_none")]
793 pub metadata: Option<Metadata>,
794}
795
796#[derive(Debug, Serialize, Clone, PartialEq)]
798#[serde(tag = "type", rename_all = "snake_case")]
799pub enum CreateEvalRunDataSource {
800 Jsonl(CreateEvalJsonlRunDataSource),
802 Completions(CreateEvalCompletionsRunDataSource),
804 Responses(CreateEvalResponsesRunDataSource),
806}
807
808impl Default for CreateEvalRunDataSource {
810 fn default() -> Self {
811 todo!()
812 }
813}
814
815#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
817pub struct DeleteEvalRunResponse {
818 pub object: String,
820 pub deleted: bool,
822 pub run_id: String,
824}
825
826#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
830pub struct EvalRunOutputItem {
831 pub object: String,
833 pub id: String,
835 pub run_id: String,
837 pub eval_id: String,
839 pub created_at: u64,
841 pub status: String,
843 pub datasource_item_id: u64,
845 pub datasource_item: serde_json::Value,
847 pub results: Vec<EvalRunOutputItemResult>,
849 pub sample: EvalRunOutputItemSample,
851}
852
853#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
855pub struct EvalRunOutputItemResult {
856 pub name: String,
858 pub score: f64,
860 pub passed: bool,
862 #[serde(skip_serializing_if = "Option::is_none")]
864 pub sample: Option<serde_json::Value>,
865}
866
867#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
868pub struct SimpleOutputMessage {
869 pub role: String,
870 pub content: String,
871}
872
873#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
875pub struct EvalRunOutputItemSample {
876 pub input: Vec<SimpleInputMessage>,
878 pub output: Vec<SimpleOutputMessage>,
880 pub finish_reason: String,
882 pub model: String,
884 pub usage: EvalRunOutputItemUsage,
886 pub error: Option<EvalApiError>,
888 pub temperature: f64,
890 pub max_completion_tokens: i32,
892 pub top_p: f64,
894 pub seed: i32,
896}
897
898#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
900pub struct EvalRunOutputItemUsage {
901 pub total_tokens: i32,
903 pub completion_tokens: i32,
905 pub prompt_tokens: i32,
907 pub cached_tokens: i32,
909}
910
911#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
913pub struct EvalRunOutputItemList {
914 pub object: String,
916 pub data: Vec<EvalRunOutputItem>,
918 pub first_id: Option<String>,
920 pub last_id: Option<String>,
922 pub has_more: bool,
924}
925
926#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
928pub struct EvalApiError {
929 pub code: String,
931 pub message: String,
933}