async-openai 0.34.0

Rust library for OpenAI
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
use derive_builder::Builder;
use serde::{Deserialize, Serialize};

use crate::error::OpenAIError;
use crate::types::chat::{ChatCompletionTool, ImageDetail, InputAudio, ResponseFormat};
use crate::types::graders::{
    GraderLabelModel, GraderPython, GraderScoreModel, GraderStringCheck, GraderTextSimilarity,
};
use crate::types::responses::{ResponseTextParam, Tool};
use crate::types::Metadata;

// Re-export commonly used types
pub use crate::types::responses::{EasyInputMessage, InputTextContent, ReasoningEffort};

/// An Eval object with a data source config and testing criteria.
/// An Eval represents a task to be done for your LLM integration.
/// Like:
/// - Improve the quality of my chatbot
/// - See how well my chatbot handles customer support
/// - Check if o4-mini is better at my usecase than gpt-4o
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct Eval {
    /// The object type, which is always "eval".
    pub object: String,
    /// Unique identifier for the evaluation.
    pub id: String,
    /// The name of the evaluation.
    pub name: String,
    /// Configuration of data sources used in runs of the evaluation.
    pub data_source_config: EvalDataSourceConfig,
    /// A list of testing criteria.
    pub testing_criteria: Vec<EvalTestingCriterion>,
    /// The Unix timestamp (in seconds) for when the eval was created.
    pub created_at: u64,
    pub metadata: Metadata,
}

/// Configuration of data sources used in runs of the evaluation.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum EvalDataSourceConfig {
    /// Custom data source config.
    Custom(EvalCustomDataSourceConfig),
    /// Logs data source config.
    Logs(EvalLogsDataSourceConfig),
    /// Stored completions data source config (deprecated).
    #[serde(rename = "stored_completions")]
    StoredCompletions(EvalStoredCompletionsDataSourceConfig),
}

/// Custom data source config.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalCustomDataSourceConfig {
    /// The type of data source. Always "custom".
    #[serde(rename = "type")]
    pub r#type: String,
    /// The json schema for the run data source items.
    pub schema: serde_json::Value,
}

/// Logs data source config.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalLogsDataSourceConfig {
    /// The type of data source. Always "logs".
    #[serde(rename = "type")]
    pub r#type: String,
    /// Metadata filters for the logs data source.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metadata: Option<Metadata>,
    /// The json schema for the run data source items.
    pub schema: serde_json::Value,
}

/// Stored completions data source config (deprecated).
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalStoredCompletionsDataSourceConfig {
    /// The type of data source. Always "stored_completions".
    #[serde(rename = "type")]
    pub r#type: String,
    /// Metadata filters for the stored completions data source.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metadata: Option<Metadata>,
    /// The json schema for the run data source items.
    pub schema: serde_json::Value,
}

/// A list of testing criteria.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum EvalTestingCriterion {
    /// Label model grader.
    LabelModel(EvalGraderLabelModel),
    /// String check grader.
    StringCheck(EvalGraderStringCheck),
    /// Text similarity grader.
    TextSimilarity(EvalGraderTextSimilarity),
    /// Python grader.
    Python(EvalGraderPython),
    /// Score model grader.
    ScoreModel(EvalGraderScoreModel),
}

/// Label model grader.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(transparent)]
pub struct EvalGraderLabelModel(pub GraderLabelModel);

/// String check grader.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(transparent)]
pub struct EvalGraderStringCheck(pub GraderStringCheck);

/// Text similarity grader.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalGraderTextSimilarity {
    #[serde(flatten)]
    pub grader: GraderTextSimilarity,
    pub pass_threshold: f64,
}

/// Text similarity metric.
#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum TextSimilarityMetric {
    /// Cosine similarity.
    Cosine,
    /// Fuzzy match.
    FuzzyMatch,
    /// BLEU score.
    Bleu,
    /// GLEU score.
    Gleu,
    /// METEOR score.
    Meteor,
    /// ROUGE-1.
    Rouge1,
    /// ROUGE-2.
    Rouge2,
    /// ROUGE-3.
    Rouge3,
    /// ROUGE-4.
    Rouge4,
    /// ROUGE-5.
    Rouge5,
    /// ROUGE-L.
    RougeL,
}

/// Python grader.
/// also in openapi spec: GraderPython
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalGraderPython {
    #[serde(flatten)]
    pub grader: GraderPython,
    pub pass_threshold: Option<f64>,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct SamplingParams {
    /// A seed value to initialize the randomness, during sampling.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub seed: Option<i32>,
    /// An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f64>,
    /// A higher temperature increases randomness in the outputs.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f64>,
    /// The maximum number of tokens the grader model may generate in its response.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_completion_tokens: Option<i32>,
    /// Optional reasoning effort parameter.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_effort: Option<ReasoningEffort>,
}

/// Score model grader.
/// also in openapi spec: GraderScoreModel
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalGraderScoreModel {
    #[serde(flatten)]
    pub grader: GraderScoreModel,
    /// The threshold for the score.
    pub pass_threshold: Option<f64>,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalItem {
    /// The role of the message input. One of `user`, `assistant`, `system`, or
    /// `developer`.
    pub role: EvalItemRole,
    /// Inputs to the model - can contain template strings. Supports text, output text, input images, and
    /// input audio, either as a single item or an array of items.
    pub content: EvalItemContent,
}

/// The role of the message input.
#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum EvalItemRole {
    /// User role.
    User,
    /// Assistant role.
    Assistant,
    /// System role.
    System,
    /// Developer role.
    Developer,
}

/// Output text from the model.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalItemContentOutputText {
    /// The text output from the model.
    pub text: String,
}

/// Input image block used within EvalItem content arrays.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalItemInputImage {
    /// The URL of the image input.
    pub image_url: String,
    /// The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`.
    /// Defaults to `auto`.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub detail: Option<ImageDetail>,
}

/// Inputs to the model - can contain template strings.
/// Supports text, output text, input images, and input audio, either as a single item or an array of items.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(untagged)]
pub enum EvalItemContent {
    /// An array of Input text, Output text, Input image, and Input audio
    Array(Vec<EvalItemContentItem>),
    /// A single content item
    Single(EvalItemContentItem),
}

/// A single content item: input text, output text, input image, or input audio.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum EvalItemContentItem {
    /// An input text content object with type field.
    InputText(InputTextContent),
    /// An output text from the model.
    OutputText(EvalItemContentOutputText),
    /// An image input to the model.
    InputImage(EvalItemInputImage),
    /// An audio input to the model.
    InputAudio(InputAudio),
    /// A text input to the model (plain string).
    #[serde(untagged)]
    Text(String),
}

/// List of evals.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalList {
    /// The object type, which is always "list".
    pub object: String,
    /// An array of eval objects.
    pub data: Vec<Eval>,
    /// The identifier of the first eval in the data array.
    pub first_id: Option<String>,
    /// The identifier of the last eval in the data array.
    pub last_id: Option<String>,
    /// Indicates whether there are more evals available.
    pub has_more: bool,
}

#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
#[builder(name = "CreateEvalRequestArgs")]
#[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)]
#[builder(derive(Debug))]
#[builder(build_fn(error = "OpenAIError"))]
pub struct CreateEvalRequest {
    /// The name of the evaluation.
    pub name: Option<String>,
    ///The configuration for the data source used for the evaluation runs.
    /// Dictates the schema of the data used in the evaluation.
    pub data_source_config: CreateEvalDataSourceConfig,
    /// A list of graders for all eval runs in this group. Graders can reference variables in the data
    /// source using double curly braces notation, like `{{item.variable_name}}`. To reference the model's
    /// output, use the `sample` namespace (ie, `{{sample.output_text}}`).
    pub testing_criteria: Vec<CreateEvalTestingCriterion>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metadata: Option<Metadata>,
}

#[derive(Debug, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum CreateEvalDataSourceConfig {
    /// A CustomDataSourceConfig object that defines the schema for the data source used for the evaluation
    /// runs. This schema is used to define the shape of the data that will be:
    /// - Used to define your testing criteria and
    /// - What data is required when creating a run
    Custom(CreateEvalCustomDataSourceConfig),
    /// A data source config which specifies the metadata property of your logs query.
    /// This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
    Logs(CreateEvalLogsDataSourceConfig),
}

impl Default for CreateEvalDataSourceConfig {
    fn default() -> Self {
        Self::Custom(CreateEvalCustomDataSourceConfig::default())
    }
}

#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
#[builder(name = "CreateEvalCustomDataSourceConfigArgs")]
#[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)]
#[builder(derive(Debug))]
#[builder(build_fn(error = "OpenAIError"))]
pub struct CreateEvalCustomDataSourceConfig {
    /// The json schema for each row in the data source.
    pub item_schema: serde_json::Value,
    /// Whether the eval should expect you to populate the sample namespace (ie, by generating responses
    /// off of your data source).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub include_sample_schema: Option<bool>,
}

/// Logs data source config for creating an eval.
#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
#[builder(name = "CreateEvalLogsDataSourceConfigArgs")]
#[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)]
#[builder(derive(Debug))]
#[builder(build_fn(error = "OpenAIError"))]
pub struct CreateEvalLogsDataSourceConfig {
    /// Metadata filters for the logs data source.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metadata: Option<Metadata>,
}

#[derive(Debug, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum CreateEvalTestingCriterion {
    /// A LabelModelGrader object which uses a model to assign labels to each item
    /// in the evaluation.
    LabelModel(CreateEvalLabelModelGrader),
    /// A StringCheckGrader object that performs a string comparison between input and reference using a
    /// specified operation.
    StringCheck(EvalGraderStringCheck),
    /// Text similarity grader.
    TextSimilarity(EvalGraderTextSimilarity),
    /// Python grader.
    Python(EvalGraderPython),
    /// Score model grader.
    ScoreModel(EvalGraderScoreModel),
}

/// Label model grader for creating an eval.
#[derive(Debug, Serialize, Clone, PartialEq, Builder, Default)]
#[builder(name = "CreateEvalLabelModelGraderArgs")]
#[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)]
#[builder(derive(Debug))]
#[builder(build_fn(error = "OpenAIError"))]
pub struct CreateEvalLabelModelGrader {
    /// The name of the grader.
    pub name: String,
    /// The model to use for the evaluation. Must support structured outputs.
    pub model: String,
    /// A list of chat messages forming the prompt or context. May include variable references to the
    /// `item` namespace, ie `{{item.name}}`.
    pub input: Vec<CreateEvalItem>,
    /// The labels to classify to each item in the evaluation.
    pub labels: Vec<String>,
    /// The labels that indicate a passing result. Must be a subset of labels.
    pub passing_labels: Vec<String>,
}

#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct SimpleInputMessage {
    /// The role of the message.
    pub role: String,
    /// The content of the message.
    pub content: String,
}

/// A chat message that makes up the prompt or context.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum CreateEvalItem {
    /// A message input to the model with a role indicating instruction following
    /// hierarchy. Instructions given with the `developer` or `system` role take
    /// precedence over instructions given with the `user` role. Messages with the
    /// `assistant` role are presumed to have been generated by the model in previous
    /// interactions.
    Message(EvalItem),

    /// SimpleInputMessage
    #[serde(untagged)]
    Simple(SimpleInputMessage),
}

/// Request to update an eval.
#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
#[builder(name = "UpdateEvalRequestArgs")]
#[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)]
#[builder(derive(Debug))]
#[builder(build_fn(error = "OpenAIError"))]
pub struct UpdateEvalRequest {
    /// Rename the evaluation.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub name: Option<String>,
    /// Metadata attached to the eval.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metadata: Option<Metadata>,
}

/// Response from deleting an eval.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct DeleteEvalResponse {
    /// The object type, which is always "eval.deleted".
    pub object: String,
    /// Whether the eval was deleted.
    pub deleted: bool,
    /// The ID of the deleted eval.
    pub eval_id: String,
}

// EvalRun types

/// A schema representing an evaluation run.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalRun {
    /// The object type, which is always "eval.run".
    pub object: String,
    /// Unique identifier for the evaluation run.
    pub id: String,
    /// The identifier of the associated evaluation.
    pub eval_id: String,
    /// The status of the evaluation run.
    pub status: EvalRunStatus,
    /// The model that is evaluated, if applicable.
    pub model: String,
    /// The name of the evaluation run.
    pub name: String,
    /// Unix timestamp (in seconds) when the evaluation run was created.
    pub created_at: u64,
    /// The URL to the rendered evaluation run report on the UI dashboard.
    pub report_url: String,
    /// Counters summarizing the outcomes of the evaluation run.
    pub result_counts: EvalRunResultCounts,
    /// Usage statistics for each model during the evaluation run.
    pub per_model_usage: Option<Vec<EvalRunModelUsage>>,
    /// Results per testing criteria applied during the evaluation run.
    pub per_testing_criteria_results: Option<Vec<EvalRunTestingCriteriaResult>>,
    /// Information about the run's data source.
    pub data_source: EvalRunDataSource,
    /// Metadata attached to the run.
    pub metadata: Metadata,
    /// Error information, if any.
    pub error: Option<EvalApiError>,
}

/// Status of an evaluation run.
#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum EvalRunStatus {
    /// Queued.
    Queued,
    /// In progress.
    InProgress,
    /// Completed.
    Completed,
    /// Failed.
    Failed,
    /// Canceled.
    Canceled,
}

/// Counters summarizing the outcomes of the evaluation run.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalRunResultCounts {
    /// Total number of executed output items.
    pub total: u32,
    /// Number of output items that resulted in an error.
    pub errored: u32,
    /// Number of output items that failed to pass the evaluation.
    pub failed: u32,
    /// Number of output items that passed the evaluation.
    pub passed: u32,
}

/// Usage statistics for each model during the evaluation run.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalRunModelUsage {
    /// The name of the model.
    pub model_name: String,
    /// The number of invocations.
    pub invocation_count: u32,
    /// The number of prompt tokens used.
    pub prompt_tokens: u32,
    /// The number of completion tokens generated.
    pub completion_tokens: u32,
    /// The total number of tokens used.
    pub total_tokens: u32,
    /// The number of tokens retrieved from cache.
    pub cached_tokens: u32,
}

/// Results per testing criteria applied during the evaluation run.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalRunTestingCriteriaResult {
    /// A description of the testing criteria.
    pub testing_criteria: String,
    /// Number of tests passed for this criteria.
    pub passed: u32,
    /// Number of tests failed for this criteria.
    pub failed: u32,
}

/// Information about the run's data source.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum EvalRunDataSource {
    /// A JsonlRunDataSource object with that specifies a JSONL file that matches the eval
    Jsonl(CreateEvalJsonlRunDataSource),
    /// A CompletionsRunDataSource object describing a model sampling configuration.
    Completions(CreateEvalCompletionsRunDataSource),
    /// A ResponsesRunDataSource object describing a model sampling configuration.
    Responses(CreateEvalResponsesRunDataSource),
}

/// JSONL run data source.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct CreateEvalJsonlRunDataSource {
    /// Determines what populates the `item` namespace in the data source.
    pub source: EvalJsonlSource,
}

/// JSONL source.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum EvalJsonlSource {
    /// File content source.
    FileContent(EvalJsonlFileContentSource),
    /// File ID source.
    FileId(EvalJsonlFileIdSource),
}

/// JSONL file content source.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalJsonlFileContentSource {
    /// The content of the jsonl file.
    pub content: Vec<EvalJsonlContentItem>,
}

/// JSONL file ID source.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalJsonlFileIdSource {
    /// The identifier of the file.
    pub id: String,
}

/// JSONL content item.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalJsonlContentItem {
    /// The item data.
    pub item: serde_json::Value,
    /// The sample data, if any.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sample: Option<serde_json::Value>,
}

/// Completions run data source.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct CreateEvalCompletionsRunDataSource {
    /// Used when sampling from a model. Dictates the structure of the messages passed into the model. Can
    /// either be a reference to a prebuilt trajectory (ie, `item.input_trajectory`), or a template with
    /// variable references to the `item` namespace.
    pub input_messages: EvalInputMessages,
    /// The sampling parameters for the model.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sampling_params: Option<EvalSamplingParams>,
    /// The name of the model to use for generating completions (e.g. "o3-mini").
    pub model: String,
    /// Determines what populates the `item` namespace in this run's data source.
    pub source: EvalCompletionsSource,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct TemplateInputMessages {
    /// A list of chat messages forming the prompt or context. May include variable references to
    /// the `item` namespace, ie {{item.name}}.
    pub template: Vec<CreateEvalItem>,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ItemReference {
    /// A reference to a variable in the `item` namespace. Ie, "item.input_trajectory"
    pub item_reference: String,
}

/// Input messages for completions.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum EvalInputMessages {
    /// Template input messages.
    Template(TemplateInputMessages),
    /// Item reference input messages.
    ItemReference(ItemReference),
}

/// Sampling parameters for the model.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
pub struct EvalSamplingParams {
    /// A seed value to initialize the randomness, during sampling.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub seed: Option<i32>,
    /// An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f64>,
    /// A higher temperature increases randomness in the outputs.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f64>,
    /// The maximum number of tokens in the generated output.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_completion_tokens: Option<i32>,
    /// Optional reasoning effort parameter.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_effort: Option<ReasoningEffort>,
    /// An object specifying the format that the model must output.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub response_format: Option<ResponseFormat>,
    /// A list of tools the model may call.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tools: Option<Vec<ChatCompletionTool>>,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
pub struct EvalResponsesSamplingParams {
    /// A seed value to initialize the randomness, during sampling.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub seed: Option<i32>,
    /// An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f64>,
    /// A higher temperature increases randomness in the outputs.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f64>,
    /// The maximum number of tokens in the generated output.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_completion_tokens: Option<u32>,
    /// Optional reasoning effort parameter.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_effort: Option<ReasoningEffort>,
    /// An object specifying the format that the model must output.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub response_format: Option<ResponseFormat>,
    /// A list of tools the model may call.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tools: Option<Vec<Tool>>,
    /// Configuration options for a text response from the model. Can be plain
    /// text or structured JSON data. Learn more:
    /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
    /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub text: Option<ResponseTextParam>,
}

/// Completions source.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum EvalCompletionsSource {
    /// File content source.
    FileContent(EvalJsonlFileContentSource),
    /// File ID source.
    FileId(EvalJsonlFileIdSource),
    /// Stored completions source.
    StoredCompletions(EvalStoredCompletionsSource),
}

/// Stored completions source.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalStoredCompletionsSource {
    /// Metadata filters for the stored completions.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metadata: Option<Metadata>,
    /// An optional model to filter by.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model: Option<String>,
    /// An optional Unix timestamp to filter items created after this time.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub created_after: Option<u64>,
    /// An optional Unix timestamp to filter items created before this time.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub created_before: Option<u64>,
    /// An optional maximum number of items to return.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub limit: Option<i32>,
}

/// Responses run data source.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct CreateEvalResponsesRunDataSource {
    /// Used when sampling from a model. Dictates the structure of the messages passed into the model.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub input_messages: Option<EvalInputMessages>,
    /// The sampling parameters for the model.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sampling_params: Option<EvalResponsesSamplingParams>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model: Option<String>,
    /// Determines what populates the `item` namespace in this run's data source.
    pub source: EvalResponsesRunSource,
}

/// Responses source.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum EvalResponsesRunSource {
    /// File content source.
    FileContent(EvalJsonlFileContentSource),
    /// File ID source.
    FileId(EvalJsonlFileIdSource),
    /// A EvalResponsesSource object describing a run data source configuration.
    Responses(EvalResponsesSource),
}

/// A EvalResponsesSource object describing a run data source configuration.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalResponsesSource {
    /// Metadata filter for the responses. This is a query parameter used to select responses.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metadata: Option<serde_json::Value>,
    /// The name of the model to find responses for. This is a query parameter used to select responses.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model: Option<String>,
    /// Optional string to search the 'instructions' field. This is a query parameter used to select responses.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub instructions_search: Option<String>,
    /// Only include items created after this timestamp (inclusive). This is a query parameter used to select responses.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub created_after: Option<u64>,
    /// Only include items created before this timestamp (inclusive). This is a query parameter used to select responses.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub created_before: Option<u64>,
    /// Optional reasoning effort parameter. This is a query parameter used to select responses.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_effort: Option<ReasoningEffort>,
    /// Sampling temperature. This is a query parameter used to select responses.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f64>,
    /// Nucleus sampling parameter. This is a query parameter used to select responses.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f64>,
    /// List of user identifiers. This is a query parameter used to select responses.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub users: Option<Vec<String>>,
    /// List of tool names. This is a query parameter used to select responses.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tools: Option<Vec<String>>,
}

/// List of eval runs.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalRunList {
    /// The object type, which is always "list".
    pub object: String,
    /// An array of eval run objects.
    pub data: Vec<EvalRun>,
    /// The identifier of the first eval run in the data array.
    pub first_id: Option<String>,
    /// The identifier of the last eval run in the data array.
    pub last_id: Option<String>,
    /// Indicates whether there are more evals available.
    pub has_more: bool,
}

/// Request to create an eval run.
#[derive(Debug, Serialize, Clone, Builder, PartialEq, Default)]
#[builder(name = "CreateEvalRunRequestArgs")]
#[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)]
#[builder(derive(Debug))]
#[builder(build_fn(error = "OpenAIError"))]
pub struct CreateEvalRunRequest {
    /// The name of the run.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub name: Option<String>,
    /// Details about the run's data source.
    pub data_source: CreateEvalRunDataSource,
    /// Metadata attached to the run.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metadata: Option<Metadata>,
}

/// Details about the run's data source.
#[derive(Debug, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum CreateEvalRunDataSource {
    /// JSONL data source.
    Jsonl(CreateEvalJsonlRunDataSource),
    /// Completions data source.
    Completions(CreateEvalCompletionsRunDataSource),
    /// Responses data source.
    Responses(CreateEvalResponsesRunDataSource),
}

// Manual Default implementation for Builder compatibility
impl Default for CreateEvalRunDataSource {
    fn default() -> Self {
        todo!()
    }
}

/// Response from deleting an eval run.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct DeleteEvalRunResponse {
    /// The object type, which is always "eval.run.deleted".
    pub object: String,
    /// Whether the eval run was deleted.
    pub deleted: bool,
    /// The ID of the deleted eval run.
    pub run_id: String,
}

// EvalRunOutputItem types

/// A schema representing an evaluation run output item.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalRunOutputItem {
    /// The object type, which is always "eval.run.output_item".
    pub object: String,
    /// Unique identifier for the evaluation run output item.
    pub id: String,
    /// The identifier of the evaluation run associated with this output item.
    pub run_id: String,
    /// The identifier of the evaluation group.
    pub eval_id: String,
    /// Unix timestamp (in seconds) when the evaluation run was created.
    pub created_at: u64,
    /// The status of the evaluation run.
    pub status: String,
    /// The identifier for the data source item.
    pub datasource_item_id: u64,
    /// Details of the input data source item.
    pub datasource_item: serde_json::Value,
    /// A list of grader results for this output item.
    pub results: Vec<EvalRunOutputItemResult>,
    /// A sample containing the input and output of the evaluation run.
    pub sample: EvalRunOutputItemSample,
}

/// A single grader result for an evaluation run output item.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalRunOutputItemResult {
    /// The name of the grader.
    pub name: String,
    /// The numeric score produced by the grader.
    pub score: f64,
    /// Whether the grader considered the output a pass.
    pub passed: bool,
    /// Optional sample or intermediate data produced by the grader.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sample: Option<serde_json::Value>,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct SimpleOutputMessage {
    pub role: String,
    pub content: String,
}

/// A sample containing the input and output of the evaluation run.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalRunOutputItemSample {
    /// An array of input messages.
    pub input: Vec<SimpleInputMessage>,
    /// An array of output messages.
    pub output: Vec<SimpleOutputMessage>,
    /// The reason why the sample generation was finished.
    pub finish_reason: String,
    /// The model used for generating the sample.
    pub model: String,
    /// Token usage details for the sample.
    pub usage: EvalRunOutputItemUsage,
    /// Error information, if any.
    pub error: Option<EvalApiError>,
    /// The sampling temperature used.
    pub temperature: f64,
    /// The maximum number of tokens allowed for completion.
    pub max_completion_tokens: i32,
    /// The top_p value used for sampling.
    pub top_p: f64,
    /// The seed used for generating the sample.
    pub seed: i32,
}

/// Token usage details for the sample.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalRunOutputItemUsage {
    /// The total number of tokens used.
    pub total_tokens: i32,
    /// The number of completion tokens generated.
    pub completion_tokens: i32,
    /// The number of prompt tokens used.
    pub prompt_tokens: i32,
    /// The number of tokens retrieved from cache.
    pub cached_tokens: i32,
}

/// List of eval run output items.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalRunOutputItemList {
    /// The object type, which is always "list".
    pub object: String,
    /// An array of eval run output item objects.
    pub data: Vec<EvalRunOutputItem>,
    /// The identifier of the first eval run output item in the data array.
    pub first_id: Option<String>,
    /// The identifier of the last eval run output item in the data array.
    pub last_id: Option<String>,
    /// Indicates whether there are more eval run output items available.
    pub has_more: bool,
}

/// An object representing an error response from the Eval API.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EvalApiError {
    /// The error code.
    pub code: String,
    /// The error message.
    pub message: String,
}