Skip to main content

async_openai/types/images/
image.rs

1use derive_builder::Builder;
2use serde::{Deserialize, Serialize};
3
4use crate::error::OpenAIError;
5use crate::types::images::ImageInput;
6
7#[derive(Default, Debug, Serialize, Deserialize, Clone, PartialEq)]
8pub enum ImageSize {
9    #[default]
10    #[serde(rename = "auto")]
11    Auto,
12    #[serde(rename = "256x256")]
13    S256x256,
14    #[serde(rename = "512x512")]
15    S512x512,
16    #[serde(rename = "1024x1024")]
17    S1024x1024,
18    #[serde(rename = "1792x1024")]
19    S1792x1024,
20    #[serde(rename = "1024x1792")]
21    S1024x1792,
22    #[serde(rename = "1536x1024")]
23    S1536x1024,
24    #[serde(rename = "1024x1536")]
25    S1024x1536,
26    #[serde(untagged)]
27    Other(String),
28}
29
30#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
31pub enum DallE2ImageSize {
32    #[serde(rename = "256x256")]
33    S256x256,
34    #[serde(rename = "512x512")]
35    S512x512,
36    #[default]
37    #[serde(rename = "1024x1024")]
38    S1024x1024,
39}
40
41#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
42pub enum DallE3ImageSize {
43    #[default]
44    #[serde(rename = "1024x1024")]
45    S1024x1024,
46    #[serde(rename = "1792x1024")]
47    S1792x1024,
48    #[serde(rename = "1024x1792")]
49    S1024x1792,
50}
51
52#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
53pub enum GptImage1ImageSize {
54    #[default]
55    #[serde(rename = "auto")]
56    Auto,
57    #[serde(rename = "1024x1024")]
58    S1024x1024,
59    #[serde(rename = "1536x1024")]
60    S1536x1024,
61    #[serde(rename = "1024x1536")]
62    S1024x1536,
63}
64
65#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)]
66#[serde(rename_all = "lowercase")]
67pub enum ImageResponseFormat {
68    #[default]
69    Url,
70    #[serde(rename = "b64_json")]
71    B64Json,
72}
73
74#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
75pub enum ImageModel {
76    #[serde(rename = "gpt-image-2")]
77    GptImage2,
78    #[serde(rename = "gpt-image-1")]
79    GptImage1,
80    #[serde(rename = "gpt-image-1.5")]
81    GptImage1dot5,
82    #[serde(rename = "gpt-image-1-mini")]
83    GptImage1Mini,
84    #[default]
85    #[serde(rename = "dall-e-2")]
86    DallE2,
87    #[serde(rename = "dall-e-3")]
88    DallE3,
89    #[serde(untagged)]
90    Other(String),
91}
92
93#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
94#[serde(rename_all = "lowercase")]
95pub enum ImageQuality {
96    Standard,
97    HD,
98    High,
99    Medium,
100    Low,
101    #[default]
102    Auto,
103}
104
105#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
106#[serde(rename_all = "lowercase")]
107pub enum ImageStyle {
108    #[default]
109    Vivid,
110    Natural,
111}
112
113#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
114#[serde(rename_all = "lowercase")]
115pub enum ImageModeration {
116    #[default]
117    Auto,
118    Low,
119}
120
121#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
122#[serde(rename_all = "lowercase")]
123pub enum ImageOutputFormat {
124    #[default]
125    Png,
126    Jpeg,
127    Webp,
128}
129
130#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
131#[serde(rename_all = "lowercase")]
132pub enum ImageBackground {
133    #[default]
134    Auto,
135    Transparent,
136    Opaque,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize, Default, Builder, PartialEq)]
140#[builder(name = "CreateImageRequestArgs")]
141#[builder(pattern = "mutable")]
142#[builder(setter(into, strip_option), default)]
143#[builder(derive(Debug))]
144#[builder(build_fn(error = "OpenAIError"))]
145pub struct CreateImageRequest {
146    /// A text description of the desired image(s). The maximum length is 32000 characters for
147    /// the GPT image models, 1000 characters for `dall-e-2` and 4000 characters for `dall-e-3`.
148    pub prompt: String,
149
150    /// The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or the GPT image model
151    /// (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`). Defaults
152    /// to `dall-e-2` unless a parameter specific to the GPT image models is used.
153    #[serde(skip_serializing_if = "Option::is_none")]
154    pub model: Option<ImageModel>,
155
156    /// The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
157    #[serde(skip_serializing_if = "Option::is_none")]
158    pub n: Option<u8>, // min:1 max:10 default:1
159
160    /// The quality of the image that will be generated.
161    ///
162    /// - `auto` (default value) will automatically select the best quality for the given model.
163    /// - `high`, `medium` and `low` are supported for the GPT image models.
164    /// - `hd` and `standard` are supported for `dall-e-3`.
165    /// - `standard` is the only option for `dall-e-2`.
166    #[serde(skip_serializing_if = "Option::is_none")]
167    pub quality: Option<ImageQuality>,
168
169    /// The format in which generated images with `dall-e-2` and `dall-e-3` are returned. Must be one of
170    /// `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated. This
171    /// parameter isn't supported for the GPT image models which will always return base64-encoded images.
172    #[serde(skip_serializing_if = "Option::is_none")]
173    pub response_format: Option<ImageResponseFormat>,
174
175    /// The format in which the generated images are returned. This parameter is only supported for
176    /// the GPT image models. Must be one of `png`, `jpeg`, or `webp`.
177    #[serde(skip_serializing_if = "Option::is_none")]
178    pub output_format: Option<ImageOutputFormat>,
179
180    /// The compression level (0-100%) for the generated images. This parameter is only supported for
181    /// the GPT image models with the `webp` or `jpeg` output formats, and defaults to 100.
182    #[serde(skip_serializing_if = "Option::is_none")]
183    pub output_compression: Option<u8>,
184
185    /// Generate the image in streaming mode. Defaults to `false`. See the
186    /// [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more
187    /// information. This parameter is only supported for the GPT image models.
188    #[serde(skip_serializing_if = "Option::is_none")]
189    pub stream: Option<bool>,
190
191    /// The number of partial images to generate. This parameter is used for
192    /// streaming responses that return partial images. Value must be between 0 and 3.
193    /// When set to 0, the response will be a single image sent in one streaming event.
194    /// Note that the final image may be sent before the full number of partial images
195    /// are generated if the full image is generated more quickly.
196    #[serde(skip_serializing_if = "Option::is_none")]
197    pub partial_images: Option<u8>,
198
199    /// The size of the generated images. For `gpt-image-2` and
200    /// `gpt-image-2-2026-04-21`, arbitrary resolutions are supported as
201    /// `WIDTHxHEIGHT` strings, for example `1536x864`. Width and height
202    /// must both be divisible by 16 and the requested aspect ratio must be
203    /// between 1:3 and 3:1. Resolutions above `2560x1440` are experimental,
204    /// and the maximum supported resolution is `3840x2160`. The requested
205    /// size must also satisfy the model's current pixel and edge limits.
206    /// The standard sizes `1024x1024`, `1536x1024`, and `1024x1536` are
207    /// supported by the GPT image models; `auto` is supported for models
208    /// that allow automatic sizing. For `dall-e-2`, use one of `256x256`,
209    /// `512x512`, or `1024x1024`. For `dall-e-3`, use one of `1024x1024`,
210    /// `1792x1024`, or `1024x1792`.
211    #[serde(skip_serializing_if = "Option::is_none")]
212    pub size: Option<ImageSize>,
213
214    /// Control the content-moderation level for images generated by the GPT image models. Must be either `low`
215    /// for less restrictive filtering or `auto` (default value).
216    #[serde(skip_serializing_if = "Option::is_none")]
217    pub moderation: Option<ImageModeration>,
218
219    /// Allows to set transparency for the background of the generated image(s).
220    /// This parameter is only supported for the GPT image models. Must be one of
221    /// `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
222    /// model will automatically determine the best background for the image.
223    /// If `transparent`, the output format needs to support transparency, so it
224    /// should be set to either `png` (default value) or `webp`.
225    #[serde(skip_serializing_if = "Option::is_none")]
226    pub background: Option<ImageBackground>,
227
228    /// The style of the generated images. This parameter is only supported for `dall-e-3`. Must be one of
229    ///`vivid` or `natural`. Vivid causes the model to lean towards generating hyper-real and dramatic
230    /// images. Natural causes the model to produce more natural, less hyper-real looking images.
231    #[serde(skip_serializing_if = "Option::is_none")]
232    pub style: Option<ImageStyle>,
233
234    /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
235    ///[Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
236    #[serde(skip_serializing_if = "Option::is_none")]
237    pub user: Option<String>,
238}
239
240#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
241#[serde(untagged)]
242pub enum Image {
243    /// The URL of the generated image, if `response_format` is `url` (default).
244    Url {
245        url: String,
246        revised_prompt: Option<String>,
247    },
248    /// The base64-encoded JSON of the generated image, if `response_format` is `b64_json`.
249    B64Json {
250        b64_json: std::sync::Arc<String>,
251        revised_prompt: Option<String>,
252    },
253}
254
255#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
256#[serde(rename_all = "lowercase")]
257pub enum ImageResponseBackground {
258    Transparent,
259    Opaque,
260}
261
262#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
263pub struct ImageGenInputUsageDetails {
264    /// The number of text tokens in the input prompt.
265    pub text_tokens: u32,
266    /// The number of image tokens in the input prompt.
267    pub image_tokens: u32,
268}
269
270#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
271pub struct ImageGenOutputTokensDetails {
272    /// The number of text output tokens generated by the model.
273    pub text_tokens: u32,
274    /// The number of image output tokens generated by the model.
275    pub image_tokens: u32,
276}
277
278#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
279pub struct ImageGenUsage {
280    /// The number of tokens (images and text) in the input prompt.
281    pub input_tokens: u32,
282    /// The total number of tokens (images and text) used for the image generation.
283    pub total_tokens: u32,
284    /// The number of output tokens generated by the model.
285    pub output_tokens: u32,
286    /// The output token details for the image generation.
287    pub output_token_details: Option<ImageGenOutputTokensDetails>,
288    /// The input tokens detailed information for the image generation.
289    pub input_tokens_details: ImageGenInputUsageDetails,
290}
291
292#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
293pub struct ImagesResponse {
294    /// The Unix timestamp (in seconds) of when the image was created.
295    pub created: u32,
296    /// The list of generated images.
297    pub data: Vec<std::sync::Arc<Image>>,
298    /// The background parameter used for the image generation. Either `transparent` or `opaque`.
299    pub background: Option<ImageResponseBackground>,
300    /// The output format of the image generation. Either `png`, `webp`, or `jpeg`.
301    pub output_format: Option<ImageOutputFormat>,
302    /// The size of the generated image. Either `1024x1024`, `1536x1024`, `1024x1536`.
303    pub size: Option<ImageSize>,
304    /// The quality of the image generated. Either `low`, `medium`, or `high`.
305    pub quality: Option<ImageQuality>,
306    /// For the GPT image models only, the token usage information for the image generation.
307    pub usage: Option<ImageGenUsage>,
308}
309
310#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)]
311#[serde(rename_all = "lowercase")]
312pub enum InputFidelity {
313    High,
314    #[default]
315    Low,
316}
317
318#[derive(Debug, Clone, PartialEq)]
319pub enum ImageEditInput {
320    Image(ImageInput),
321    Images(Vec<ImageInput>),
322}
323
324#[derive(Debug, Clone, Default, Builder, PartialEq)]
325#[builder(name = "CreateImageEditRequestArgs")]
326#[builder(pattern = "mutable")]
327#[builder(setter(into, strip_option), default)]
328#[builder(derive(Debug))]
329#[builder(build_fn(error = "OpenAIError"))]
330pub struct CreateImageEditRequest {
331    /// The image(s) to edit. Must be a supported image file or an array of images.
332    ///
333    /// For the GPT image models (the GPT image models, `gpt-image-1-mini`, and `gpt-image-1.5`), each image
334    /// should be a `png`, `webp`, or `jpg` file less
335    /// than 50MB. You can provide up to 16 images.
336    ///
337    /// For `dall-e-2`, you can only provide one image, and it should be a square
338    /// `png` file less than 4MB.
339    pub image: ImageEditInput,
340
341    /// A text description of the desired image(s). The maximum length is 1000 characters
342    /// for `dall-e-2`, and 32000 characters for the GPT image models.
343    pub prompt: String,
344
345    /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where
346    /// `image` should be edited. If there are multiple images provided, the mask will be applied on the
347    /// first image. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`.
348    pub mask: Option<ImageInput>,
349
350    /// Allows to set transparency for the background of the generated image(s).
351    /// This parameter is only supported for the GPT image models. Must be one of
352    /// `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
353    /// model will automatically determine the best background for the image.
354    ///
355    /// If `transparent`, the output format needs to support transparency, so it
356    /// should be set to either `png` (default value) or `webp`.
357    pub background: Option<ImageBackground>,
358
359    /// The model to use for image generation. Only `dall-e-2` and the GPT image models are supported.
360    /// Defaults to `dall-e-2` unless a parameter specific to the GPT image models is used.
361    pub model: Option<ImageModel>,
362
363    /// The number of images to generate. Must be between 1 and 10.
364    pub n: Option<u8>, // min:1 max:10 default:1
365
366    /// The size of the generated images. For `gpt-image-2` and
367    /// `gpt-image-2-2026-04-21`, arbitrary resolutions are supported as
368    /// `WIDTHxHEIGHT` strings, for example `1536x864`. Width and height
369    /// must both be divisible by 16 and the requested aspect ratio must be
370    /// between 1:3 and 3:1. Resolutions above `2560x1440` are experimental,
371    /// and the maximum supported resolution is `3840x2160`. The requested
372    /// size must also satisfy the model's current pixel and edge limits.
373    /// The standard sizes `1024x1024`, `1536x1024`, and `1024x1536` are
374    /// supported by the GPT image models; `auto` is supported for models
375    /// that allow automatic sizing. For `dall-e-2`, use one of `256x256`,
376    /// `512x512`, or `1024x1024`. For `dall-e-3`, use one of `1024x1024`,
377    /// `1792x1024`, or `1024x1792`.
378    pub size: Option<ImageSize>,
379
380    /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs
381    /// are only valid for 60 minutes after the image has been generated. This parameter is only supported
382    /// for `dall-e-2`, as the GPT image models will always return base64-encoded images.
383    pub response_format: Option<ImageResponseFormat>,
384
385    /// The format in which the generated images are returned. This parameter is
386    /// only supported for the GPT image models. Must be one of `png`, `jpeg`, or `webp`.
387    /// The default value is `png`.
388    pub output_format: Option<ImageOutputFormat>,
389
390    /// The compression level (0-100%) for the generated images. This parameter
391    /// is only supported for the GPT image models with the `webp` or `jpeg` output
392    /// formats, and defaults to 100.
393    pub output_compression: Option<u8>,
394
395    /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
396    /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
397    pub user: Option<String>,
398
399    /// Control how much effort the model will exert to match the style and features, especially facial
400    /// features, of input images. This parameter is only supported for the GPT image models. Unsupported for
401    /// `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
402    pub input_fidelity: Option<InputFidelity>,
403
404    /// Edit the image in streaming mode. Defaults to `false`. See the
405    /// [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more
406    /// information.
407    pub stream: Option<bool>,
408
409    /// The number of partial images to generate. This parameter is used for
410    /// streaming responses that return partial images. Value must be between 0 and 3.
411    /// When set to 0, the response will be a single image sent in one streaming event.
412
413    /// Note that the final image may be sent before the full number of partial images
414    /// are generated if the full image is generated more quickly.
415    pub partial_images: Option<u8>,
416
417    /// The quality of the image that will be generated. `high`, `medium` and `low` are only supported for
418    /// the GPT image models. `dall-e-2` only supports `standard` quality. Defaults to `auto`.
419    pub quality: Option<ImageQuality>,
420}
421
422#[derive(Debug, Default, Clone, Builder, PartialEq)]
423#[builder(name = "CreateImageVariationRequestArgs")]
424#[builder(pattern = "mutable")]
425#[builder(setter(into, strip_option), default)]
426#[builder(derive(Debug))]
427#[builder(build_fn(error = "OpenAIError"))]
428pub struct CreateImageVariationRequest {
429    /// The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and
430    /// square.
431    pub image: ImageInput,
432
433    /// The model to use for image generation. Only `dall-e-2` is supported at this time.
434    pub model: Option<ImageModel>,
435
436    /// The number of images to generate. Must be between 1 and 10.
437    pub n: Option<u8>, // min:1 max:10 default:1
438
439    /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs
440    /// are only valid for 60 minutes after the image has been generated.
441    pub response_format: Option<ImageResponseFormat>,
442
443    /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.
444    pub size: Option<DallE2ImageSize>,
445
446    /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
447    /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
448    pub user: Option<String>,
449}
450
451/// Reference an input image by either URL or uploaded file ID.
452/// Provide exactly one of `image_url` or `file_id`.
453#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
454pub struct ImageRefParam {
455    /// A fully qualified URL or base64-encoded data URL.
456    #[serde(skip_serializing_if = "Option::is_none")]
457    pub image_url: Option<String>,
458    /// The File API ID of an uploaded image to use as input.
459    #[serde(skip_serializing_if = "Option::is_none")]
460    pub file_id: Option<String>,
461}
462
463/// JSON request body for image edits.
464///
465/// Use `images` (array of `ImageRefParam`) instead of multipart `image` uploads.
466/// You can reference images via external URLs, data URLs, or uploaded file IDs.
467/// JSON edits support GPT image models only; DALL-E edits require multipart (`dall-e-2` only).
468#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Builder, Default)]
469#[builder(name = "EditImageJsonRequestArgs")]
470#[builder(pattern = "mutable")]
471#[builder(setter(into, strip_option), default)]
472#[builder(derive(Debug))]
473#[builder(build_fn(error = "OpenAIError"))]
474pub struct EditImageJsonRequest {
475    /// The model to use for image editing.
476    #[serde(skip_serializing_if = "Option::is_none")]
477    pub model: Option<ImageModel>,
478
479    /// Input image references to edit. For GPT image models, you can provide up to 16 images.
480    pub images: Vec<ImageRefParam>,
481
482    /// An optional mask image reference indicating which areas of the image should be edited.
483    #[serde(skip_serializing_if = "Option::is_none")]
484    pub mask: Option<ImageRefParam>,
485
486    /// A text description of the desired image edit.
487    pub prompt: String,
488
489    /// The number of edited images to generate. Must be between 1 and 10.
490    #[serde(skip_serializing_if = "Option::is_none")]
491    pub n: Option<u8>,
492
493    /// The quality of the image that will be generated.
494    #[serde(skip_serializing_if = "Option::is_none")]
495    pub quality: Option<ImageQuality>,
496
497    /// Control how much effort the model will exert to match the style and features,
498    /// especially facial features, of input images. Supports `high` and `low`. Defaults to `low`.
499    #[serde(skip_serializing_if = "Option::is_none")]
500    pub input_fidelity: Option<InputFidelity>,
501
502    /// The size of the generated image.
503    #[serde(skip_serializing_if = "Option::is_none")]
504    pub size: Option<ImageSize>,
505
506    /// A unique identifier representing your end-user.
507    #[serde(skip_serializing_if = "Option::is_none")]
508    pub user: Option<String>,
509
510    /// The output format for the generated image.
511    #[serde(skip_serializing_if = "Option::is_none")]
512    pub output_format: Option<ImageOutputFormat>,
513
514    /// The compression level (0-100%) for the generated images.
515    #[serde(skip_serializing_if = "Option::is_none")]
516    pub output_compression: Option<u8>,
517
518    /// Control the content-moderation level for images.
519    #[serde(skip_serializing_if = "Option::is_none")]
520    pub moderation: Option<ImageModeration>,
521
522    /// The background style for the generated image.
523    #[serde(skip_serializing_if = "Option::is_none")]
524    pub background: Option<ImageBackground>,
525
526    /// Whether to stream the image generation. Defaults to `false`.
527    #[serde(skip_serializing_if = "Option::is_none")]
528    pub stream: Option<bool>,
529
530    /// The number of partial images to generate during streaming.
531    #[serde(skip_serializing_if = "Option::is_none")]
532    pub partial_images: Option<u8>,
533}