1use derive_builder::Builder;
2use serde::{Deserialize, Serialize};
3
4use crate::error::OpenAIError;
5use crate::types::graders::{
6 GraderLabelModel, GraderPython, GraderScoreModel, GraderStringCheck, GraderTextSimilarity,
7};
8use crate::types::responses::{ResponseTextParam, Tool};
9use crate::types::{ChatCompletionTool, ImageDetail, InputAudio, Metadata, ResponseFormat};
10
11pub use crate::types::responses::{EasyInputMessage, InputTextContent, ReasoningEffort};
13
14#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
21pub struct Eval {
22 pub object: String,
24 pub id: String,
26 pub name: String,
28 pub data_source_config: EvalDataSourceConfig,
30 pub testing_criteria: Vec<EvalTestingCriterion>,
32 pub created_at: u64,
34 pub metadata: Metadata,
35}
36
37#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
39#[serde(tag = "type", rename_all = "snake_case")]
40pub enum EvalDataSourceConfig {
41 Custom(EvalCustomDataSourceConfig),
43 Logs(EvalLogsDataSourceConfig),
45 #[serde(rename = "stored_completions")]
47 StoredCompletions(EvalStoredCompletionsDataSourceConfig),
48}
49
50#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
52pub struct EvalCustomDataSourceConfig {
53 #[serde(rename = "type")]
55 pub r#type: String,
56 pub schema: serde_json::Value,
58}
59
60#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
62pub struct EvalLogsDataSourceConfig {
63 #[serde(rename = "type")]
65 pub r#type: String,
66 #[serde(skip_serializing_if = "Option::is_none")]
68 pub metadata: Option<Metadata>,
69 pub schema: serde_json::Value,
71}
72
73#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
75pub struct EvalStoredCompletionsDataSourceConfig {
76 #[serde(rename = "type")]
78 pub r#type: String,
79 #[serde(skip_serializing_if = "Option::is_none")]
81 pub metadata: Option<Metadata>,
82 pub schema: serde_json::Value,
84}
85
86#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
88#[serde(tag = "type", rename_all = "snake_case")]
89pub enum EvalTestingCriterion {
90 LabelModel(EvalGraderLabelModel),
92 StringCheck(EvalGraderStringCheck),
94 TextSimilarity(EvalGraderTextSimilarity),
96 Python(EvalGraderPython),
98 ScoreModel(EvalGraderScoreModel),
100}
101
102#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
104#[serde(transparent)]
105pub struct EvalGraderLabelModel(pub GraderLabelModel);
106
107#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
109#[serde(transparent)]
110pub struct EvalGraderStringCheck(pub GraderStringCheck);
111
112#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
114pub struct EvalGraderTextSimilarity {
115 #[serde(flatten)]
116 pub grader: GraderTextSimilarity,
117 pub pass_threshold: f64,
118}
119
120#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
122#[serde(rename_all = "snake_case")]
123pub enum TextSimilarityMetric {
124 Cosine,
126 FuzzyMatch,
128 Bleu,
130 Gleu,
132 Meteor,
134 Rouge1,
136 Rouge2,
138 Rouge3,
140 Rouge4,
142 Rouge5,
144 RougeL,
146}
147
148#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
151pub struct EvalGraderPython {
152 #[serde(flatten)]
153 pub grader: GraderPython,
154 pub pass_threshold: Option<f64>,
155}
156
157#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
158pub struct SamplingParams {
159 #[serde(skip_serializing_if = "Option::is_none")]
161 pub seed: Option<i32>,
162 #[serde(skip_serializing_if = "Option::is_none")]
164 pub top_p: Option<f64>,
165 #[serde(skip_serializing_if = "Option::is_none")]
167 pub temperature: Option<f64>,
168 #[serde(skip_serializing_if = "Option::is_none")]
170 pub max_completion_tokens: Option<i32>,
171 #[serde(skip_serializing_if = "Option::is_none")]
173 pub reasoning_effort: Option<ReasoningEffort>,
174}
175
176#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
179pub struct EvalGraderScoreModel {
180 #[serde(flatten)]
181 pub grader: GraderScoreModel,
182 pub pass_threshold: Option<f64>,
184}
185
186#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
187pub struct EvalItem {
188 pub role: EvalItemRole,
191 pub content: EvalItemContent,
193}
194
195#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
197#[serde(rename_all = "lowercase")]
198pub enum EvalItemRole {
199 User,
201 Assistant,
203 System,
205 Developer,
207}
208
209#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
210pub struct OutputText {
211 pub text: String,
213}
214
215#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
216pub struct InputImage {
217 pub image_url: String,
219 #[serde(skip_serializing_if = "Option::is_none")]
222 pub detail: Option<ImageDetail>,
223}
224
225#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
227#[serde(tag = "type", rename_all = "snake_case")]
228pub enum EvalItemContent {
229 InputText(InputTextContent),
231 OutputText(OutputText),
233 InputImage(InputImage),
235 InputAudio(InputAudio),
237 Array(Vec<EvalItemContent>),
239 #[serde(untagged)]
240 Text(String),
242}
243
244#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
246pub struct EvalList {
247 pub object: String,
249 pub data: Vec<Eval>,
251 pub first_id: String,
253 pub last_id: String,
255 pub has_more: bool,
257}
258
259#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
260#[builder(name = "CreateEvalRequestArgs")]
261#[builder(pattern = "mutable")]
262#[builder(setter(into, strip_option), default)]
263#[builder(derive(Debug))]
264#[builder(build_fn(error = "OpenAIError"))]
265pub struct CreateEvalRequest {
266 pub name: Option<String>,
268 pub data_source_config: CreateEvalDataSourceConfig,
271 pub testing_criteria: Vec<CreateEvalTestingCriterion>,
275 #[serde(skip_serializing_if = "Option::is_none")]
276 pub metadata: Option<Metadata>,
277}
278
279#[derive(Debug, Serialize, Clone, PartialEq)]
280#[serde(tag = "type", rename_all = "snake_case")]
281pub enum CreateEvalDataSourceConfig {
282 Custom(CreateEvalCustomDataSourceConfig),
287 Logs(CreateEvalLogsDataSourceConfig),
290}
291
292impl Default for CreateEvalDataSourceConfig {
293 fn default() -> Self {
294 Self::Custom(CreateEvalCustomDataSourceConfig::default())
295 }
296}
297
298#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
299#[builder(name = "CreateEvalCustomDataSourceConfigArgs")]
300#[builder(pattern = "mutable")]
301#[builder(setter(into, strip_option), default)]
302#[builder(derive(Debug))]
303#[builder(build_fn(error = "OpenAIError"))]
304pub struct CreateEvalCustomDataSourceConfig {
305 pub item_schema: serde_json::Value,
307 #[serde(skip_serializing_if = "Option::is_none")]
310 pub include_sample_schema: Option<bool>,
311}
312
313#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
315#[builder(name = "CreateEvalLogsDataSourceConfigArgs")]
316#[builder(pattern = "mutable")]
317#[builder(setter(into, strip_option), default)]
318#[builder(derive(Debug))]
319#[builder(build_fn(error = "OpenAIError"))]
320pub struct CreateEvalLogsDataSourceConfig {
321 #[serde(skip_serializing_if = "Option::is_none")]
323 pub metadata: Option<Metadata>,
324}
325
326#[derive(Debug, Serialize, Clone, PartialEq)]
327#[serde(tag = "type", rename_all = "snake_case")]
328pub enum CreateEvalTestingCriterion {
329 LabelModel(CreateEvalLabelModelGrader),
332 StringCheck(EvalGraderStringCheck),
335 TextSimilarity(EvalGraderTextSimilarity),
337 Python(EvalGraderPython),
339 ScoreModel(EvalGraderScoreModel),
341}
342
343#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
345#[builder(name = "CreateEvalLabelModelGraderArgs")]
346#[builder(pattern = "mutable")]
347#[builder(setter(into, strip_option), default)]
348#[builder(derive(Debug))]
349#[builder(build_fn(error = "OpenAIError"))]
350pub struct CreateEvalLabelModelGrader {
351 pub name: String,
353 pub model: String,
355 pub input: Vec<CreateEvalItem>,
358 pub labels: Vec<String>,
360 pub passing_labels: Vec<String>,
362}
363
364#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
365pub struct SimpleInputMessage {
366 pub role: String,
368 pub content: String,
370}
371
372#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
374#[serde(tag = "type", rename_all = "snake_case")]
375pub enum CreateEvalItem {
376 Message(EvalItem),
382
383 #[serde(untagged)]
385 Simple(SimpleInputMessage),
386}
387
388#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
390#[builder(name = "UpdateEvalRequestArgs")]
391#[builder(pattern = "mutable")]
392#[builder(setter(into, strip_option), default)]
393#[builder(derive(Debug))]
394#[builder(build_fn(error = "OpenAIError"))]
395pub struct UpdateEvalRequest {
396 #[serde(skip_serializing_if = "Option::is_none")]
398 pub name: Option<String>,
399 #[serde(skip_serializing_if = "Option::is_none")]
401 pub metadata: Option<Metadata>,
402}
403
404#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
406pub struct DeleteEvalResponse {
407 pub object: String,
409 pub deleted: bool,
411 pub eval_id: String,
413}
414
415#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
419pub struct EvalRun {
420 pub object: String,
422 pub id: String,
424 pub eval_id: String,
426 pub status: EvalRunStatus,
428 pub model: String,
430 pub name: String,
432 pub created_at: u64,
434 pub report_url: String,
436 pub result_counts: EvalRunResultCounts,
438 pub per_model_usage: Option<Vec<EvalRunModelUsage>>,
440 pub per_testing_criteria_results: Option<Vec<EvalRunTestingCriteriaResult>>,
442 pub data_source: EvalRunDataSource,
444 pub metadata: Metadata,
446 pub error: Option<EvalApiError>,
448}
449
450#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
452#[serde(rename_all = "snake_case")]
453pub enum EvalRunStatus {
454 Queued,
456 InProgress,
458 Completed,
460 Failed,
462 Canceled,
464}
465
466#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
468pub struct EvalRunResultCounts {
469 pub total: u32,
471 pub errored: u32,
473 pub failed: u32,
475 pub passed: u32,
477}
478
479#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
481pub struct EvalRunModelUsage {
482 pub model_name: String,
484 pub invocation_count: u32,
486 pub prompt_tokens: u32,
488 pub completion_tokens: u32,
490 pub total_tokens: u32,
492 pub cached_tokens: u32,
494}
495
496#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
498pub struct EvalRunTestingCriteriaResult {
499 pub testing_criteria: String,
501 pub passed: u32,
503 pub failed: u32,
505}
506
507#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
509#[serde(tag = "type", rename_all = "snake_case")]
510pub enum EvalRunDataSource {
511 Jsonl(CreateEvalJsonlRunDataSource),
513 Completions(CreateEvalCompletionsRunDataSource),
515 Responses(CreateEvalResponsesRunDataSource),
517}
518
519#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
521pub struct CreateEvalJsonlRunDataSource {
522 pub source: EvalJsonlSource,
524}
525
526#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
528#[serde(tag = "type", rename_all = "snake_case")]
529pub enum EvalJsonlSource {
530 FileContent(EvalJsonlFileContentSource),
532 FileId(EvalJsonlFileIdSource),
534}
535
536#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
538pub struct EvalJsonlFileContentSource {
539 pub content: Vec<EvalJsonlContentItem>,
541}
542
543#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
545pub struct EvalJsonlFileIdSource {
546 pub id: String,
548}
549
550#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
552pub struct EvalJsonlContentItem {
553 pub item: serde_json::Value,
555 #[serde(skip_serializing_if = "Option::is_none")]
557 pub sample: Option<serde_json::Value>,
558}
559
560#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
562pub struct CreateEvalCompletionsRunDataSource {
563 pub input_messages: EvalInputMessages,
567 #[serde(skip_serializing_if = "Option::is_none")]
569 pub sampling_params: Option<EvalSamplingParams>,
570 pub model: String,
572 pub source: EvalCompletionsSource,
574}
575
576#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
577pub struct TemplateInputMessages {
578 pub template: Vec<CreateEvalItem>,
581}
582
583#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
584pub struct ItemReference {
585 pub item_reference: String,
587}
588
589#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
591#[serde(tag = "type", rename_all = "snake_case")]
592pub enum EvalInputMessages {
593 Template(TemplateInputMessages),
595 ItemReference(ItemReference),
597}
598
599#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
601pub struct EvalSamplingParams {
602 #[serde(skip_serializing_if = "Option::is_none")]
604 pub seed: Option<i32>,
605 #[serde(skip_serializing_if = "Option::is_none")]
607 pub top_p: Option<f64>,
608 #[serde(skip_serializing_if = "Option::is_none")]
610 pub temperature: Option<f64>,
611 #[serde(skip_serializing_if = "Option::is_none")]
613 pub max_completion_tokens: Option<i32>,
614 #[serde(skip_serializing_if = "Option::is_none")]
616 pub reasoning_effort: Option<ReasoningEffort>,
617 #[serde(skip_serializing_if = "Option::is_none")]
619 pub response_format: Option<ResponseFormat>,
620 #[serde(skip_serializing_if = "Option::is_none")]
622 pub tools: Option<Vec<ChatCompletionTool>>,
623}
624
625#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
626pub struct EvalResponsesSamplingParams {
627 #[serde(skip_serializing_if = "Option::is_none")]
629 pub seed: Option<i32>,
630 #[serde(skip_serializing_if = "Option::is_none")]
632 pub top_p: Option<f64>,
633 #[serde(skip_serializing_if = "Option::is_none")]
635 pub temperature: Option<f64>,
636 #[serde(skip_serializing_if = "Option::is_none")]
638 pub max_completion_tokens: Option<u32>,
639 #[serde(skip_serializing_if = "Option::is_none")]
641 pub reasoning_effort: Option<ReasoningEffort>,
642 #[serde(skip_serializing_if = "Option::is_none")]
644 pub response_format: Option<ResponseFormat>,
645 #[serde(skip_serializing_if = "Option::is_none")]
647 pub tools: Option<Vec<Tool>>,
648 #[serde(skip_serializing_if = "Option::is_none")]
653 pub text: Option<ResponseTextParam>,
654}
655
656#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
658#[serde(tag = "type", rename_all = "snake_case")]
659pub enum EvalCompletionsSource {
660 FileContent(EvalJsonlFileContentSource),
662 FileId(EvalJsonlFileIdSource),
664 StoredCompletions(EvalStoredCompletionsSource),
666}
667
668#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
670pub struct EvalStoredCompletionsSource {
671 #[serde(skip_serializing_if = "Option::is_none")]
673 pub metadata: Option<Metadata>,
674 #[serde(skip_serializing_if = "Option::is_none")]
676 pub model: Option<String>,
677 #[serde(skip_serializing_if = "Option::is_none")]
679 pub created_after: Option<i64>,
680 #[serde(skip_serializing_if = "Option::is_none")]
682 pub created_before: Option<i64>,
683 #[serde(skip_serializing_if = "Option::is_none")]
685 pub limit: Option<i32>,
686}
687
688#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
690pub struct CreateEvalResponsesRunDataSource {
691 #[serde(skip_serializing_if = "Option::is_none")]
693 pub input_messages: Option<EvalInputMessages>,
694 #[serde(skip_serializing_if = "Option::is_none")]
696 pub sampling_params: Option<EvalResponsesSamplingParams>,
697 #[serde(skip_serializing_if = "Option::is_none")]
698 pub model: Option<String>,
699 pub source: EvalResponsesRunSource,
701}
702
703#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
705#[serde(tag = "type", rename_all = "snake_case")]
706pub enum EvalResponsesRunSource {
707 FileContent(EvalJsonlFileContentSource),
709 FileId(EvalJsonlFileIdSource),
711 Responses(EvalResponsesSource),
713}
714
715#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
717pub struct EvalResponsesSource {
718 #[serde(skip_serializing_if = "Option::is_none")]
720 pub metadata: Option<serde_json::Value>,
721 #[serde(skip_serializing_if = "Option::is_none")]
723 pub model: Option<String>,
724 #[serde(skip_serializing_if = "Option::is_none")]
726 pub instructions_search: Option<String>,
727 #[serde(skip_serializing_if = "Option::is_none")]
729 pub created_after: Option<u64>,
730 #[serde(skip_serializing_if = "Option::is_none")]
732 pub created_before: Option<u64>,
733 #[serde(skip_serializing_if = "Option::is_none")]
735 pub reasoning_effort: Option<ReasoningEffort>,
736 #[serde(skip_serializing_if = "Option::is_none")]
738 pub temperature: Option<f64>,
739 #[serde(skip_serializing_if = "Option::is_none")]
741 pub top_p: Option<f64>,
742 #[serde(skip_serializing_if = "Option::is_none")]
744 pub users: Option<Vec<String>>,
745 #[serde(skip_serializing_if = "Option::is_none")]
747 pub tools: Option<Vec<String>>,
748}
749
750#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
752pub struct EvalRunList {
753 pub object: String,
755 pub data: Vec<EvalRun>,
757 pub first_id: String,
759 pub last_id: String,
761 pub has_more: bool,
763}
764
765#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
767#[builder(name = "CreateEvalRunRequestArgs")]
768#[builder(pattern = "mutable")]
769#[builder(setter(into, strip_option), default)]
770#[builder(derive(Debug))]
771#[builder(build_fn(error = "OpenAIError"))]
772pub struct CreateEvalRunRequest {
773 #[serde(skip_serializing_if = "Option::is_none")]
775 pub name: Option<String>,
776 pub data_source: CreateEvalRunDataSource,
778 #[serde(skip_serializing_if = "Option::is_none")]
780 pub metadata: Option<Metadata>,
781}
782
783#[derive(Debug, Serialize, Clone, PartialEq)]
785#[serde(tag = "type", rename_all = "snake_case")]
786pub enum CreateEvalRunDataSource {
787 Jsonl(CreateEvalJsonlRunDataSource),
789 Completions(CreateEvalCompletionsRunDataSource),
791 Responses(CreateEvalResponsesRunDataSource),
793}
794
795impl Default for CreateEvalRunDataSource {
797 fn default() -> Self {
798 todo!()
799 }
800}
801
802#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
804pub struct DeleteEvalRunResponse {
805 pub object: String,
807 pub deleted: bool,
809 pub run_id: String,
811}
812
813#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
817pub struct EvalRunOutputItem {
818 pub object: String,
820 pub id: String,
822 pub run_id: String,
824 pub eval_id: String,
826 pub created_at: i64,
828 pub status: String,
830 pub datasource_item_id: u64,
832 pub datasource_item: serde_json::Value,
834 pub results: Vec<EvalRunOutputItemResult>,
836 pub sample: EvalRunOutputItemSample,
838}
839
840#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
842pub struct EvalRunOutputItemResult {
843 pub name: String,
845 pub score: f64,
847 pub passed: bool,
849 #[serde(skip_serializing_if = "Option::is_none")]
851 pub sample: Option<serde_json::Value>,
852}
853
854#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
855pub struct SimpleOutputMessage {
856 pub role: String,
857 pub content: String,
858}
859
860#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
862pub struct EvalRunOutputItemSample {
863 pub input: Vec<SimpleInputMessage>,
865 pub output: Vec<SimpleOutputMessage>,
867 pub finish_reason: String,
869 pub model: String,
871 pub usage: EvalRunOutputItemUsage,
873 pub error: Option<EvalApiError>,
875 pub temperature: f64,
877 pub max_completion_tokens: i32,
879 pub top_p: f64,
881 pub seed: i32,
883}
884
885#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
887pub struct EvalRunOutputItemUsage {
888 pub total_tokens: i32,
890 pub completion_tokens: i32,
892 pub prompt_tokens: i32,
894 pub cached_tokens: i32,
896}
897
898#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
900pub struct EvalRunOutputItemList {
901 pub object: String,
903 pub data: Vec<EvalRunOutputItem>,
905 pub first_id: String,
907 pub last_id: String,
909 pub has_more: bool,
911}
912
913#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
915pub struct EvalApiError {
916 pub code: String,
918 pub message: String,
920}