async_openai/types/embeddings/embedding.rs
1use derive_builder::Builder;
2use serde::{Deserialize, Serialize};
3
4use crate::error::OpenAIError;
5
6#[derive(Debug, Serialize, Clone, PartialEq, Deserialize)]
7#[serde(untagged)]
8pub enum EmbeddingInput {
9 String(String),
10 StringArray(Vec<String>),
11 // Minimum value is 0, maximum value is 100257 (inclusive).
12 IntegerArray(Vec<u32>),
13 ArrayOfIntegerArray(Vec<Vec<u32>>),
14}
15
16#[derive(Debug, Serialize, Default, Clone, PartialEq, Deserialize)]
17#[serde(rename_all = "lowercase")]
18pub enum EncodingFormat {
19 #[default]
20 Float,
21 Base64,
22}
23
24#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
25#[builder(name = "CreateEmbeddingRequestArgs")]
26#[builder(pattern = "mutable")]
27#[builder(setter(into, strip_option), default)]
28#[builder(derive(Debug))]
29#[builder(build_fn(error = "OpenAIError"))]
30pub struct CreateEmbeddingRequest {
31 /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list)
32 /// API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models)
33 /// for descriptions of them.
34 pub model: String,
35
36 /// Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single
37 /// request, pass an array of strings or array of token arrays. The input must not exceed the max
38 /// input tokens for the model (8192 tokens for all embedding models), cannot be an empty string, and
39 /// any array must be 2048 dimensions or less. [Example Python
40 /// code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.
41 /// In addition to the per-input token limit, all embedding models enforce a maximum of 300,000
42 /// tokens summed across all inputs in a single request.
43 pub input: EmbeddingInput,
44
45 /// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/).
46 #[serde(skip_serializing_if = "Option::is_none")]
47 pub encoding_format: Option<EncodingFormat>,
48
49 /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
50 /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
51 #[serde(skip_serializing_if = "Option::is_none")]
52 pub user: Option<String>,
53
54 /// The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models.
55 #[serde(skip_serializing_if = "Option::is_none")]
56 pub dimensions: Option<u32>,
57}
58
59/// Represents an embedding vector returned by embedding endpoint.
60#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
61pub struct Embedding {
62 /// The index of the embedding in the list of embeddings.
63 pub index: u32,
64 /// The object type, which is always "embedding".
65 pub object: String,
66 /// The embedding vector, which is a list of floats. The length of vector
67 /// depends on the model as listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings).
68 pub embedding: Vec<f32>,
69}
70
71#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
72pub struct Base64EmbeddingVector(pub String);
73
74/// Represents an base64-encoded embedding vector returned by embedding endpoint.
75#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
76pub struct Base64Embedding {
77 /// The index of the embedding in the list of embeddings.
78 pub index: u32,
79 /// The object type, which is always "embedding".
80 pub object: String,
81 /// The embedding vector, encoded in base64.
82 pub embedding: Base64EmbeddingVector,
83}
84
85#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
86pub struct EmbeddingUsage {
87 /// The number of tokens used by the prompt.
88 pub prompt_tokens: u32,
89 /// The total number of tokens used by the request.
90 pub total_tokens: u32,
91}
92
93#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
94pub struct CreateEmbeddingResponse {
95 pub object: String,
96 /// The name of the model used to generate the embedding.
97 pub model: String,
98 /// The list of embeddings generated by the model.
99 pub data: Vec<Embedding>,
100 /// The usage information for the request.
101 pub usage: EmbeddingUsage,
102}
103
104#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
105pub struct CreateBase64EmbeddingResponse {
106 pub object: String,
107 /// The name of the model used to generate the embedding.
108 pub model: String,
109 /// The list of embeddings generated by the model.
110 pub data: Vec<Base64Embedding>,
111 /// The usage information for the request.
112 pub usage: EmbeddingUsage,
113}