1use derive_builder::Builder;
2use serde::{Deserialize, Serialize};
3
4use crate::error::OpenAIError;
5use crate::types::chat::{ChatCompletionTool, ImageDetail, InputAudio, ResponseFormat};
6use crate::types::graders::{
7 GraderLabelModel, GraderPython, GraderScoreModel, GraderStringCheck, GraderTextSimilarity,
8};
9use crate::types::responses::{ResponseTextParam, Tool};
10use crate::types::Metadata;
11
12pub use crate::types::responses::{EasyInputMessage, InputTextContent, ReasoningEffort};
14
15#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
22pub struct Eval {
23 pub object: String,
25 pub id: String,
27 pub name: String,
29 pub data_source_config: EvalDataSourceConfig,
31 pub testing_criteria: Vec<EvalTestingCriterion>,
33 pub created_at: u64,
35 pub metadata: Metadata,
36}
37
38#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
40#[serde(tag = "type", rename_all = "snake_case")]
41pub enum EvalDataSourceConfig {
42 Custom(EvalCustomDataSourceConfig),
44 Logs(EvalLogsDataSourceConfig),
46 #[serde(rename = "stored_completions")]
48 StoredCompletions(EvalStoredCompletionsDataSourceConfig),
49}
50
51#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
53pub struct EvalCustomDataSourceConfig {
54 #[serde(rename = "type")]
56 pub r#type: String,
57 pub schema: serde_json::Value,
59}
60
61#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
63pub struct EvalLogsDataSourceConfig {
64 #[serde(rename = "type")]
66 pub r#type: String,
67 #[serde(skip_serializing_if = "Option::is_none")]
69 pub metadata: Option<Metadata>,
70 pub schema: serde_json::Value,
72}
73
74#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
76pub struct EvalStoredCompletionsDataSourceConfig {
77 #[serde(rename = "type")]
79 pub r#type: String,
80 #[serde(skip_serializing_if = "Option::is_none")]
82 pub metadata: Option<Metadata>,
83 pub schema: serde_json::Value,
85}
86
87#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
89#[serde(tag = "type", rename_all = "snake_case")]
90pub enum EvalTestingCriterion {
91 LabelModel(EvalGraderLabelModel),
93 StringCheck(EvalGraderStringCheck),
95 TextSimilarity(EvalGraderTextSimilarity),
97 Python(EvalGraderPython),
99 ScoreModel(EvalGraderScoreModel),
101}
102
103#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
105#[serde(transparent)]
106pub struct EvalGraderLabelModel(pub GraderLabelModel);
107
108#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
110#[serde(transparent)]
111pub struct EvalGraderStringCheck(pub GraderStringCheck);
112
113#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
115pub struct EvalGraderTextSimilarity {
116 #[serde(flatten)]
117 pub grader: GraderTextSimilarity,
118 pub pass_threshold: f64,
119}
120
121#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
123#[serde(rename_all = "snake_case")]
124pub enum TextSimilarityMetric {
125 Cosine,
127 FuzzyMatch,
129 Bleu,
131 Gleu,
133 Meteor,
135 Rouge1,
137 Rouge2,
139 Rouge3,
141 Rouge4,
143 Rouge5,
145 RougeL,
147}
148
149#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
152pub struct EvalGraderPython {
153 #[serde(flatten)]
154 pub grader: GraderPython,
155 pub pass_threshold: Option<f64>,
156}
157
158#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
159pub struct SamplingParams {
160 #[serde(skip_serializing_if = "Option::is_none")]
162 pub seed: Option<i32>,
163 #[serde(skip_serializing_if = "Option::is_none")]
165 pub top_p: Option<f64>,
166 #[serde(skip_serializing_if = "Option::is_none")]
168 pub temperature: Option<f64>,
169 #[serde(skip_serializing_if = "Option::is_none")]
171 pub max_completion_tokens: Option<i32>,
172 #[serde(skip_serializing_if = "Option::is_none")]
174 pub reasoning_effort: Option<ReasoningEffort>,
175}
176
177#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
180pub struct EvalGraderScoreModel {
181 #[serde(flatten)]
182 pub grader: GraderScoreModel,
183 pub pass_threshold: Option<f64>,
185}
186
187#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
188pub struct EvalItem {
189 pub role: EvalItemRole,
192 pub content: EvalItemContent,
194}
195
196#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
198#[serde(rename_all = "lowercase")]
199pub enum EvalItemRole {
200 User,
202 Assistant,
204 System,
206 Developer,
208}
209
210#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
211pub struct OutputText {
212 pub text: String,
214}
215
216#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
217pub struct InputImage {
218 pub image_url: String,
220 #[serde(skip_serializing_if = "Option::is_none")]
223 pub detail: Option<ImageDetail>,
224}
225
226#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
228#[serde(tag = "type", rename_all = "snake_case")]
229pub enum EvalItemContent {
230 InputText(InputTextContent),
232 OutputText(OutputText),
234 InputImage(InputImage),
236 InputAudio(InputAudio),
238 Array(Vec<EvalItemContent>),
240 #[serde(untagged)]
241 Text(String),
243}
244
245#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
247pub struct EvalList {
248 pub object: String,
250 pub data: Vec<Eval>,
252 pub first_id: String,
254 pub last_id: String,
256 pub has_more: bool,
258}
259
260#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
261#[builder(name = "CreateEvalRequestArgs")]
262#[builder(pattern = "mutable")]
263#[builder(setter(into, strip_option), default)]
264#[builder(derive(Debug))]
265#[builder(build_fn(error = "OpenAIError"))]
266pub struct CreateEvalRequest {
267 pub name: Option<String>,
269 pub data_source_config: CreateEvalDataSourceConfig,
272 pub testing_criteria: Vec<CreateEvalTestingCriterion>,
276 #[serde(skip_serializing_if = "Option::is_none")]
277 pub metadata: Option<Metadata>,
278}
279
280#[derive(Debug, Serialize, Clone, PartialEq)]
281#[serde(tag = "type", rename_all = "snake_case")]
282pub enum CreateEvalDataSourceConfig {
283 Custom(CreateEvalCustomDataSourceConfig),
288 Logs(CreateEvalLogsDataSourceConfig),
291}
292
293impl Default for CreateEvalDataSourceConfig {
294 fn default() -> Self {
295 Self::Custom(CreateEvalCustomDataSourceConfig::default())
296 }
297}
298
299#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
300#[builder(name = "CreateEvalCustomDataSourceConfigArgs")]
301#[builder(pattern = "mutable")]
302#[builder(setter(into, strip_option), default)]
303#[builder(derive(Debug))]
304#[builder(build_fn(error = "OpenAIError"))]
305pub struct CreateEvalCustomDataSourceConfig {
306 pub item_schema: serde_json::Value,
308 #[serde(skip_serializing_if = "Option::is_none")]
311 pub include_sample_schema: Option<bool>,
312}
313
314#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
316#[builder(name = "CreateEvalLogsDataSourceConfigArgs")]
317#[builder(pattern = "mutable")]
318#[builder(setter(into, strip_option), default)]
319#[builder(derive(Debug))]
320#[builder(build_fn(error = "OpenAIError"))]
321pub struct CreateEvalLogsDataSourceConfig {
322 #[serde(skip_serializing_if = "Option::is_none")]
324 pub metadata: Option<Metadata>,
325}
326
327#[derive(Debug, Serialize, Clone, PartialEq)]
328#[serde(tag = "type", rename_all = "snake_case")]
329pub enum CreateEvalTestingCriterion {
330 LabelModel(CreateEvalLabelModelGrader),
333 StringCheck(EvalGraderStringCheck),
336 TextSimilarity(EvalGraderTextSimilarity),
338 Python(EvalGraderPython),
340 ScoreModel(EvalGraderScoreModel),
342}
343
344#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
346#[builder(name = "CreateEvalLabelModelGraderArgs")]
347#[builder(pattern = "mutable")]
348#[builder(setter(into, strip_option), default)]
349#[builder(derive(Debug))]
350#[builder(build_fn(error = "OpenAIError"))]
351pub struct CreateEvalLabelModelGrader {
352 pub name: String,
354 pub model: String,
356 pub input: Vec<CreateEvalItem>,
359 pub labels: Vec<String>,
361 pub passing_labels: Vec<String>,
363}
364
365#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
366pub struct SimpleInputMessage {
367 pub role: String,
369 pub content: String,
371}
372
373#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
375#[serde(tag = "type", rename_all = "snake_case")]
376pub enum CreateEvalItem {
377 Message(EvalItem),
383
384 #[serde(untagged)]
386 Simple(SimpleInputMessage),
387}
388
389#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
391#[builder(name = "UpdateEvalRequestArgs")]
392#[builder(pattern = "mutable")]
393#[builder(setter(into, strip_option), default)]
394#[builder(derive(Debug))]
395#[builder(build_fn(error = "OpenAIError"))]
396pub struct UpdateEvalRequest {
397 #[serde(skip_serializing_if = "Option::is_none")]
399 pub name: Option<String>,
400 #[serde(skip_serializing_if = "Option::is_none")]
402 pub metadata: Option<Metadata>,
403}
404
405#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
407pub struct DeleteEvalResponse {
408 pub object: String,
410 pub deleted: bool,
412 pub eval_id: String,
414}
415
416#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
420pub struct EvalRun {
421 pub object: String,
423 pub id: String,
425 pub eval_id: String,
427 pub status: EvalRunStatus,
429 pub model: String,
431 pub name: String,
433 pub created_at: u64,
435 pub report_url: String,
437 pub result_counts: EvalRunResultCounts,
439 pub per_model_usage: Option<Vec<EvalRunModelUsage>>,
441 pub per_testing_criteria_results: Option<Vec<EvalRunTestingCriteriaResult>>,
443 pub data_source: EvalRunDataSource,
445 pub metadata: Metadata,
447 pub error: Option<EvalApiError>,
449}
450
451#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
453#[serde(rename_all = "snake_case")]
454pub enum EvalRunStatus {
455 Queued,
457 InProgress,
459 Completed,
461 Failed,
463 Canceled,
465}
466
467#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
469pub struct EvalRunResultCounts {
470 pub total: u32,
472 pub errored: u32,
474 pub failed: u32,
476 pub passed: u32,
478}
479
480#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
482pub struct EvalRunModelUsage {
483 pub model_name: String,
485 pub invocation_count: u32,
487 pub prompt_tokens: u32,
489 pub completion_tokens: u32,
491 pub total_tokens: u32,
493 pub cached_tokens: u32,
495}
496
497#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
499pub struct EvalRunTestingCriteriaResult {
500 pub testing_criteria: String,
502 pub passed: u32,
504 pub failed: u32,
506}
507
508#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
510#[serde(tag = "type", rename_all = "snake_case")]
511pub enum EvalRunDataSource {
512 Jsonl(CreateEvalJsonlRunDataSource),
514 Completions(CreateEvalCompletionsRunDataSource),
516 Responses(CreateEvalResponsesRunDataSource),
518}
519
520#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
522pub struct CreateEvalJsonlRunDataSource {
523 pub source: EvalJsonlSource,
525}
526
527#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
529#[serde(tag = "type", rename_all = "snake_case")]
530pub enum EvalJsonlSource {
531 FileContent(EvalJsonlFileContentSource),
533 FileId(EvalJsonlFileIdSource),
535}
536
537#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
539pub struct EvalJsonlFileContentSource {
540 pub content: Vec<EvalJsonlContentItem>,
542}
543
544#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
546pub struct EvalJsonlFileIdSource {
547 pub id: String,
549}
550
551#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
553pub struct EvalJsonlContentItem {
554 pub item: serde_json::Value,
556 #[serde(skip_serializing_if = "Option::is_none")]
558 pub sample: Option<serde_json::Value>,
559}
560
561#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
563pub struct CreateEvalCompletionsRunDataSource {
564 pub input_messages: EvalInputMessages,
568 #[serde(skip_serializing_if = "Option::is_none")]
570 pub sampling_params: Option<EvalSamplingParams>,
571 pub model: String,
573 pub source: EvalCompletionsSource,
575}
576
577#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
578pub struct TemplateInputMessages {
579 pub template: Vec<CreateEvalItem>,
582}
583
584#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
585pub struct ItemReference {
586 pub item_reference: String,
588}
589
590#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
592#[serde(tag = "type", rename_all = "snake_case")]
593pub enum EvalInputMessages {
594 Template(TemplateInputMessages),
596 ItemReference(ItemReference),
598}
599
600#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
602pub struct EvalSamplingParams {
603 #[serde(skip_serializing_if = "Option::is_none")]
605 pub seed: Option<i32>,
606 #[serde(skip_serializing_if = "Option::is_none")]
608 pub top_p: Option<f64>,
609 #[serde(skip_serializing_if = "Option::is_none")]
611 pub temperature: Option<f64>,
612 #[serde(skip_serializing_if = "Option::is_none")]
614 pub max_completion_tokens: Option<i32>,
615 #[serde(skip_serializing_if = "Option::is_none")]
617 pub reasoning_effort: Option<ReasoningEffort>,
618 #[serde(skip_serializing_if = "Option::is_none")]
620 pub response_format: Option<ResponseFormat>,
621 #[serde(skip_serializing_if = "Option::is_none")]
623 pub tools: Option<Vec<ChatCompletionTool>>,
624}
625
626#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
627pub struct EvalResponsesSamplingParams {
628 #[serde(skip_serializing_if = "Option::is_none")]
630 pub seed: Option<i32>,
631 #[serde(skip_serializing_if = "Option::is_none")]
633 pub top_p: Option<f64>,
634 #[serde(skip_serializing_if = "Option::is_none")]
636 pub temperature: Option<f64>,
637 #[serde(skip_serializing_if = "Option::is_none")]
639 pub max_completion_tokens: Option<u32>,
640 #[serde(skip_serializing_if = "Option::is_none")]
642 pub reasoning_effort: Option<ReasoningEffort>,
643 #[serde(skip_serializing_if = "Option::is_none")]
645 pub response_format: Option<ResponseFormat>,
646 #[serde(skip_serializing_if = "Option::is_none")]
648 pub tools: Option<Vec<Tool>>,
649 #[serde(skip_serializing_if = "Option::is_none")]
654 pub text: Option<ResponseTextParam>,
655}
656
657#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
659#[serde(tag = "type", rename_all = "snake_case")]
660pub enum EvalCompletionsSource {
661 FileContent(EvalJsonlFileContentSource),
663 FileId(EvalJsonlFileIdSource),
665 StoredCompletions(EvalStoredCompletionsSource),
667}
668
669#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
671pub struct EvalStoredCompletionsSource {
672 #[serde(skip_serializing_if = "Option::is_none")]
674 pub metadata: Option<Metadata>,
675 #[serde(skip_serializing_if = "Option::is_none")]
677 pub model: Option<String>,
678 #[serde(skip_serializing_if = "Option::is_none")]
680 pub created_after: Option<i64>,
681 #[serde(skip_serializing_if = "Option::is_none")]
683 pub created_before: Option<i64>,
684 #[serde(skip_serializing_if = "Option::is_none")]
686 pub limit: Option<i32>,
687}
688
689#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
691pub struct CreateEvalResponsesRunDataSource {
692 #[serde(skip_serializing_if = "Option::is_none")]
694 pub input_messages: Option<EvalInputMessages>,
695 #[serde(skip_serializing_if = "Option::is_none")]
697 pub sampling_params: Option<EvalResponsesSamplingParams>,
698 #[serde(skip_serializing_if = "Option::is_none")]
699 pub model: Option<String>,
700 pub source: EvalResponsesRunSource,
702}
703
704#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
706#[serde(tag = "type", rename_all = "snake_case")]
707pub enum EvalResponsesRunSource {
708 FileContent(EvalJsonlFileContentSource),
710 FileId(EvalJsonlFileIdSource),
712 Responses(EvalResponsesSource),
714}
715
716#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
718pub struct EvalResponsesSource {
719 #[serde(skip_serializing_if = "Option::is_none")]
721 pub metadata: Option<serde_json::Value>,
722 #[serde(skip_serializing_if = "Option::is_none")]
724 pub model: Option<String>,
725 #[serde(skip_serializing_if = "Option::is_none")]
727 pub instructions_search: Option<String>,
728 #[serde(skip_serializing_if = "Option::is_none")]
730 pub created_after: Option<u64>,
731 #[serde(skip_serializing_if = "Option::is_none")]
733 pub created_before: Option<u64>,
734 #[serde(skip_serializing_if = "Option::is_none")]
736 pub reasoning_effort: Option<ReasoningEffort>,
737 #[serde(skip_serializing_if = "Option::is_none")]
739 pub temperature: Option<f64>,
740 #[serde(skip_serializing_if = "Option::is_none")]
742 pub top_p: Option<f64>,
743 #[serde(skip_serializing_if = "Option::is_none")]
745 pub users: Option<Vec<String>>,
746 #[serde(skip_serializing_if = "Option::is_none")]
748 pub tools: Option<Vec<String>>,
749}
750
751#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
753pub struct EvalRunList {
754 pub object: String,
756 pub data: Vec<EvalRun>,
758 pub first_id: String,
760 pub last_id: String,
762 pub has_more: bool,
764}
765
766#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
768#[builder(name = "CreateEvalRunRequestArgs")]
769#[builder(pattern = "mutable")]
770#[builder(setter(into, strip_option), default)]
771#[builder(derive(Debug))]
772#[builder(build_fn(error = "OpenAIError"))]
773pub struct CreateEvalRunRequest {
774 #[serde(skip_serializing_if = "Option::is_none")]
776 pub name: Option<String>,
777 pub data_source: CreateEvalRunDataSource,
779 #[serde(skip_serializing_if = "Option::is_none")]
781 pub metadata: Option<Metadata>,
782}
783
784#[derive(Debug, Serialize, Clone, PartialEq)]
786#[serde(tag = "type", rename_all = "snake_case")]
787pub enum CreateEvalRunDataSource {
788 Jsonl(CreateEvalJsonlRunDataSource),
790 Completions(CreateEvalCompletionsRunDataSource),
792 Responses(CreateEvalResponsesRunDataSource),
794}
795
796impl Default for CreateEvalRunDataSource {
798 fn default() -> Self {
799 todo!()
800 }
801}
802
803#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
805pub struct DeleteEvalRunResponse {
806 pub object: String,
808 pub deleted: bool,
810 pub run_id: String,
812}
813
814#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
818pub struct EvalRunOutputItem {
819 pub object: String,
821 pub id: String,
823 pub run_id: String,
825 pub eval_id: String,
827 pub created_at: i64,
829 pub status: String,
831 pub datasource_item_id: u64,
833 pub datasource_item: serde_json::Value,
835 pub results: Vec<EvalRunOutputItemResult>,
837 pub sample: EvalRunOutputItemSample,
839}
840
841#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
843pub struct EvalRunOutputItemResult {
844 pub name: String,
846 pub score: f64,
848 pub passed: bool,
850 #[serde(skip_serializing_if = "Option::is_none")]
852 pub sample: Option<serde_json::Value>,
853}
854
855#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
856pub struct SimpleOutputMessage {
857 pub role: String,
858 pub content: String,
859}
860
861#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
863pub struct EvalRunOutputItemSample {
864 pub input: Vec<SimpleInputMessage>,
866 pub output: Vec<SimpleOutputMessage>,
868 pub finish_reason: String,
870 pub model: String,
872 pub usage: EvalRunOutputItemUsage,
874 pub error: Option<EvalApiError>,
876 pub temperature: f64,
878 pub max_completion_tokens: i32,
880 pub top_p: f64,
882 pub seed: i32,
884}
885
886#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
888pub struct EvalRunOutputItemUsage {
889 pub total_tokens: i32,
891 pub completion_tokens: i32,
893 pub prompt_tokens: i32,
895 pub cached_tokens: i32,
897}
898
899#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
901pub struct EvalRunOutputItemList {
902 pub object: String,
904 pub data: Vec<EvalRunOutputItem>,
906 pub first_id: String,
908 pub last_id: String,
910 pub has_more: bool,
912}
913
914#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
916pub struct EvalApiError {
917 pub code: String,
919 pub message: String,
921}