async_openai/types/images/
image.rs

1use derive_builder::Builder;
2use serde::{Deserialize, Serialize};
3
4use crate::error::OpenAIError;
5use crate::types::InputSource;
6
7#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
8pub enum ImageSize {
9    #[default]
10    #[serde(rename = "auto")]
11    Auto,
12    #[serde(rename = "256x256")]
13    S256x256,
14    #[serde(rename = "512x512")]
15    S512x512,
16    #[serde(rename = "1024x1024")]
17    S1024x1024,
18    #[serde(rename = "1792x1024")]
19    S1792x1024,
20    #[serde(rename = "1024x1792")]
21    S1024x1792,
22    #[serde(rename = "1536x1024")]
23    S1536x1024,
24    #[serde(rename = "1024x1536")]
25    S1024x1536,
26}
27
28#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
29pub enum DallE2ImageSize {
30    #[serde(rename = "256x256")]
31    S256x256,
32    #[serde(rename = "512x512")]
33    S512x512,
34    #[default]
35    #[serde(rename = "1024x1024")]
36    S1024x1024,
37}
38
39#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
40pub enum DallE3ImageSize {
41    #[default]
42    #[serde(rename = "1024x1024")]
43    S1024x1024,
44    #[serde(rename = "1792x1024")]
45    S1792x1024,
46    #[serde(rename = "1024x1792")]
47    S1024x1792,
48}
49
50#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
51pub enum GptImage1ImageSize {
52    #[default]
53    #[serde(rename = "auto")]
54    Auto,
55    #[serde(rename = "1024x1024")]
56    S1024x1024,
57    #[serde(rename = "1536x1024")]
58    S1536x1024,
59    #[serde(rename = "1024x1536")]
60    S1024x1536,
61}
62
63#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)]
64#[serde(rename_all = "lowercase")]
65pub enum ImageResponseFormat {
66    #[default]
67    Url,
68    #[serde(rename = "b64_json")]
69    B64Json,
70}
71
72#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
73pub enum ImageModel {
74    #[serde(rename = "gpt-image-1")]
75    GptImage1,
76    #[serde(rename = "gpt-image-1-mini")]
77    GptImage1Mini,
78    #[default]
79    #[serde(rename = "dall-e-2")]
80    DallE2,
81    #[serde(rename = "dall-e-3")]
82    DallE3,
83    #[serde(untagged)]
84    Other(String),
85}
86
87#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
88#[serde(rename_all = "lowercase")]
89pub enum ImageQuality {
90    Standard,
91    HD,
92    High,
93    Medium,
94    Low,
95    #[default]
96    Auto,
97}
98
99#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
100#[serde(rename_all = "lowercase")]
101pub enum ImageStyle {
102    #[default]
103    Vivid,
104    Natural,
105}
106
107#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
108#[serde(rename_all = "lowercase")]
109pub enum ImageModeration {
110    #[default]
111    Auto,
112    Low,
113}
114
115#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
116#[serde(rename_all = "lowercase")]
117pub enum ImageOutputFormat {
118    #[default]
119    Png,
120    Jpeg,
121    Webp,
122}
123
124#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
125#[serde(rename_all = "lowercase")]
126pub enum ImageBackground {
127    #[default]
128    Auto,
129    Transparent,
130    Opaque,
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize, Default, Builder, PartialEq)]
134#[builder(name = "CreateImageRequestArgs")]
135#[builder(pattern = "mutable")]
136#[builder(setter(into, strip_option), default)]
137#[builder(derive(Debug))]
138#[builder(build_fn(error = "OpenAIError"))]
139pub struct CreateImageRequest {
140    /// A text description of the desired image(s). The maximum length is 32000 characters for
141    /// `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters for `dall-e-3`.
142    pub prompt: String,
143
144    /// The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or `gpt-image-1`. Defaults
145    /// to `dall-e-2` unless a parameter specific to `gpt-image-1` is used.
146    #[serde(skip_serializing_if = "Option::is_none")]
147    pub model: Option<ImageModel>,
148
149    /// The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
150    #[serde(skip_serializing_if = "Option::is_none")]
151    pub n: Option<u8>, // min:1 max:10 default:1
152
153    /// The quality of the image that will be generated.
154    ///
155    /// - `auto` (default value) will automatically select the best quality for the given model.
156    /// - `high`, `medium` and `low` are supported for `gpt-image-1`.
157    /// - `hd` and `standard` are supported for `dall-e-3`.
158    /// - `standard` is the only option for `dall-e-2`.
159    #[serde(skip_serializing_if = "Option::is_none")]
160    pub quality: Option<ImageQuality>,
161
162    /// The format in which generated images with `dall-e-2` and `dall-e-3` are returned. Must be one of
163    /// `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated. This
164    /// parameter isn't supported for `gpt-image-1` which will always return base64-encoded images.
165    #[serde(skip_serializing_if = "Option::is_none")]
166    pub response_format: Option<ImageResponseFormat>,
167
168    /// The format in which the generated images are returned. This parameter is only supported for
169    /// `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
170    #[serde(skip_serializing_if = "Option::is_none")]
171    pub output_format: Option<ImageOutputFormat>,
172
173    /// The compression level (0-100%) for the generated images. This parameter is only supported for
174    /// `gpt-image-1` with the `webp` or `jpeg` output formats, and defaults to 100.
175    #[serde(skip_serializing_if = "Option::is_none")]
176    pub output_compression: Option<u8>,
177
178    /// Generate the image in streaming mode. Defaults to `false`. See the
179    /// [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more
180    /// information. This parameter is only supported for `gpt-image-1`.
181    #[serde(skip_serializing_if = "Option::is_none")]
182    pub stream: Option<bool>,
183
184    /// The number of partial images to generate. This parameter is used for
185    /// streaming responses that return partial images. Value must be between 0 and 3.
186    /// When set to 0, the response will be a single image sent in one streaming event.
187    /// Note that the final image may be sent before the full number of partial images
188    /// are generated if the full image is generated more quickly.
189    #[serde(skip_serializing_if = "Option::is_none")]
190    pub partial_images: Option<u8>,
191
192    /// The size of the generated images. Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536`
193    /// (portrait), or `auto` (default value) for `gpt-image-1`, one of `256x256`, `512x512`, or
194    /// `1024x1024` for `dall-e-2`, and one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
195    #[serde(skip_serializing_if = "Option::is_none")]
196    pub size: Option<ImageSize>,
197
198    /// Control the content-moderation level for images generated by `gpt-image-1`. Must be either `low`
199    /// for less restrictive filtering or `auto` (default value).
200    #[serde(skip_serializing_if = "Option::is_none")]
201    pub moderation: Option<ImageModeration>,
202
203    /// Allows to set transparency for the background of the generated image(s).
204    /// This parameter is only supported for `gpt-image-1`. Must be one of
205    /// `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
206    /// model will automatically determine the best background for the image.
207    /// If `transparent`, the output format needs to support transparency, so it
208    /// should be set to either `png` (default value) or `webp`.
209    #[serde(skip_serializing_if = "Option::is_none")]
210    pub background: Option<ImageBackground>,
211
212    /// The style of the generated images. This parameter is only supported for `dall-e-3`. Must be one of
213    ///`vivid` or `natural`. Vivid causes the model to lean towards generating hyper-real and dramatic
214    /// images. Natural causes the model to produce more natural, less hyper-real looking images.
215    #[serde(skip_serializing_if = "Option::is_none")]
216    pub style: Option<ImageStyle>,
217
218    /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
219    ///[Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
220    #[serde(skip_serializing_if = "Option::is_none")]
221    pub user: Option<String>,
222}
223
224#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
225#[serde(untagged)]
226pub enum Image {
227    /// The URL of the generated image, if `response_format` is `url` (default).
228    Url {
229        url: String,
230        revised_prompt: Option<String>,
231    },
232    /// The base64-encoded JSON of the generated image, if `response_format` is `b64_json`.
233    B64Json {
234        b64_json: std::sync::Arc<String>,
235        revised_prompt: Option<String>,
236    },
237}
238
239#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
240#[serde(rename_all = "lowercase")]
241pub enum ImageResponseBackground {
242    Transparent,
243    Opaque,
244}
245
246#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
247pub struct ImageGenInputUsageDetails {
248    /// The number of text tokens in the input prompt.
249    pub text_tokens: u32,
250    /// The number of image tokens in the input prompt.
251    pub image_tokens: u32,
252}
253
254#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
255pub struct ImageGenUsage {
256    /// The number of tokens (images and text) in the input prompt.
257    pub input_tokens: u32,
258    /// The total number of tokens (images and text) used for the image generation.
259    pub total_tokens: u32,
260    /// The number of output tokens generated by the model.
261    pub output_tokens: u32,
262    /// The input tokens detailed information for the image generation.
263    pub input_tokens_details: ImageGenInputUsageDetails,
264}
265
266#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
267pub struct ImagesResponse {
268    /// The Unix timestamp (in seconds) of when the image was created.
269    pub created: u32,
270    /// The list of generated images.
271    pub data: Vec<std::sync::Arc<Image>>,
272    /// The background parameter used for the image generation. Either `transparent` or `opaque`.
273    pub background: Option<ImageResponseBackground>,
274    /// The output format of the image generation. Either `png`, `webp`, or `jpeg`.
275    pub output_format: Option<ImageOutputFormat>,
276    /// The size of the generated image. Either `1024x1024`, `1536x1024`, `1024x1536`.
277    pub size: Option<ImageSize>,
278    /// The quality of the image generated. Either `low`, `medium`, or `high`.
279    pub quality: Option<ImageQuality>,
280    /// For `gpt-image-1` only, the token usage information for the image generation.
281    pub usage: Option<ImageGenUsage>,
282}
283
284#[derive(Debug, Default, Clone, PartialEq)]
285pub struct ImageInput {
286    pub source: InputSource,
287}
288
289#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)]
290#[serde(rename_all = "lowercase")]
291pub enum InputFidelity {
292    High,
293    #[default]
294    Low,
295}
296
297#[derive(Debug, Clone, PartialEq)]
298pub enum ImageEditInput {
299    Image(ImageInput),
300    Images(Vec<ImageInput>),
301}
302
303#[derive(Debug, Clone, Default, Builder, PartialEq)]
304#[builder(name = "CreateImageEditRequestArgs")]
305#[builder(pattern = "mutable")]
306#[builder(setter(into, strip_option), default)]
307#[builder(derive(Debug))]
308#[builder(build_fn(error = "OpenAIError"))]
309pub struct CreateImageEditRequest {
310    /// The image(s) to edit. Must be a supported image file or an array of images.
311    ///
312    /// For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less
313    /// than 50MB. You can provide up to 16 images.
314    ///
315    /// For `dall-e-2`, you can only provide one image, and it should be a square
316    /// `png` file less than 4MB.
317    pub image: ImageEditInput,
318
319    /// A text description of the desired image(s). The maximum length is 1000 characters
320    /// for `dall-e-2`, and 32000 characters for `gpt-image-1`.
321    pub prompt: String,
322
323    /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where
324    /// `image` should be edited. If there are multiple images provided, the mask will be applied on the
325    /// first image. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`.
326    pub mask: Option<ImageInput>,
327
328    /// Allows to set transparency for the background of the generated image(s).
329    /// This parameter is only supported for `gpt-image-1`. Must be one of
330    /// `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
331    /// model will automatically determine the best background for the image.
332    ///
333    /// If `transparent`, the output format needs to support transparency, so it
334    /// should be set to either `png` (default value) or `webp`.
335    pub background: Option<ImageBackground>,
336
337    /// The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are supported.
338    /// Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1` is used.
339    pub model: Option<ImageModel>,
340
341    /// The number of images to generate. Must be between 1 and 10.
342    pub n: Option<u8>, // min:1 max:10 default:1
343
344    /// The size of the generated images. Must be one of `1024x1024`, `1536x1024` (landscape),
345    /// `1024x1536` (portrait), or `auto` (default value) for `gpt-image-1`, and one of `256x256`,
346    /// `512x512`, or `1024x1024` for `dall-e-2`.
347    pub size: Option<ImageSize>,
348
349    /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs
350    /// are only valid for 60 minutes after the image has been generated. This parameter is only supported
351    /// for `dall-e-2`, as `gpt-image-1` will always return base64-encoded images.
352    pub response_format: Option<ImageResponseFormat>,
353
354    /// The format in which the generated images are returned. This parameter is
355    /// only supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
356    /// The default value is `png`.
357    pub output_format: Option<ImageOutputFormat>,
358
359    /// The compression level (0-100%) for the generated images. This parameter
360    /// is only supported for `gpt-image-1` with the `webp` or `jpeg` output
361    /// formats, and defaults to 100.
362    pub output_compression: Option<u8>,
363
364    /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
365    /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
366    pub user: Option<String>,
367
368    /// Control how much effort the model will exert to match the style and features, especially facial
369    /// features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for
370    /// `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
371    pub input_fidelity: Option<InputFidelity>,
372
373    /// Edit the image in streaming mode. Defaults to `false`. See the
374    /// [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more
375    /// information.
376    pub stream: Option<bool>,
377
378    /// The number of partial images to generate. This parameter is used for
379    /// streaming responses that return partial images. Value must be between 0 and 3.
380    /// When set to 0, the response will be a single image sent in one streaming event.
381
382    /// Note that the final image may be sent before the full number of partial images
383    /// are generated if the full image is generated more quickly.
384    pub partial_images: Option<u8>,
385
386    /// The quality of the image that will be generated. `high`, `medium` and `low` are only supported for
387    /// `gpt-image-1`. `dall-e-2` only supports `standard` quality. Defaults to `auto`.
388    pub quality: Option<ImageQuality>,
389}
390
391#[derive(Debug, Default, Clone, Builder, PartialEq)]
392#[builder(name = "CreateImageVariationRequestArgs")]
393#[builder(pattern = "mutable")]
394#[builder(setter(into, strip_option), default)]
395#[builder(derive(Debug))]
396#[builder(build_fn(error = "OpenAIError"))]
397pub struct CreateImageVariationRequest {
398    /// The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and
399    /// square.
400    pub image: ImageInput,
401
402    /// The model to use for image generation. Only `dall-e-2` is supported at this time.
403    pub model: Option<ImageModel>,
404
405    /// The number of images to generate. Must be between 1 and 10.
406    pub n: Option<u8>, // min:1 max:10 default:1
407
408    /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs
409    /// are only valid for 60 minutes after the image has been generated.
410    pub response_format: Option<ImageResponseFormat>,
411
412    /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.
413    pub size: Option<DallE2ImageSize>,
414
415    /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
416    /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
417    pub user: Option<String>,
418}