async_openai/types/images/image.rs
1use derive_builder::Builder;
2use serde::{Deserialize, Serialize};
3
4use crate::error::OpenAIError;
5use crate::types::images::ImageInput;
6
7#[derive(Default, Debug, Serialize, Deserialize, Clone, PartialEq)]
8pub enum ImageSize {
9 #[default]
10 #[serde(rename = "auto")]
11 Auto,
12 #[serde(rename = "256x256")]
13 S256x256,
14 #[serde(rename = "512x512")]
15 S512x512,
16 #[serde(rename = "1024x1024")]
17 S1024x1024,
18 #[serde(rename = "1792x1024")]
19 S1792x1024,
20 #[serde(rename = "1024x1792")]
21 S1024x1792,
22 #[serde(rename = "1536x1024")]
23 S1536x1024,
24 #[serde(rename = "1024x1536")]
25 S1024x1536,
26 #[serde(untagged)]
27 Other(String),
28}
29
30#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
31pub enum DallE2ImageSize {
32 #[serde(rename = "256x256")]
33 S256x256,
34 #[serde(rename = "512x512")]
35 S512x512,
36 #[default]
37 #[serde(rename = "1024x1024")]
38 S1024x1024,
39}
40
41#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
42pub enum DallE3ImageSize {
43 #[default]
44 #[serde(rename = "1024x1024")]
45 S1024x1024,
46 #[serde(rename = "1792x1024")]
47 S1792x1024,
48 #[serde(rename = "1024x1792")]
49 S1024x1792,
50}
51
52#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
53pub enum GptImage1ImageSize {
54 #[default]
55 #[serde(rename = "auto")]
56 Auto,
57 #[serde(rename = "1024x1024")]
58 S1024x1024,
59 #[serde(rename = "1536x1024")]
60 S1536x1024,
61 #[serde(rename = "1024x1536")]
62 S1024x1536,
63}
64
65#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)]
66#[serde(rename_all = "lowercase")]
67pub enum ImageResponseFormat {
68 #[default]
69 Url,
70 #[serde(rename = "b64_json")]
71 B64Json,
72}
73
74#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
75pub enum ImageModel {
76 #[serde(rename = "gpt-image-2")]
77 GptImage2,
78 #[serde(rename = "gpt-image-1")]
79 GptImage1,
80 #[serde(rename = "gpt-image-1.5")]
81 GptImage1dot5,
82 #[serde(rename = "gpt-image-1-mini")]
83 GptImage1Mini,
84 #[default]
85 #[serde(rename = "dall-e-2")]
86 DallE2,
87 #[serde(rename = "dall-e-3")]
88 DallE3,
89 #[serde(untagged)]
90 Other(String),
91}
92
93#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
94#[serde(rename_all = "lowercase")]
95pub enum ImageQuality {
96 Standard,
97 HD,
98 High,
99 Medium,
100 Low,
101 #[default]
102 Auto,
103}
104
105#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
106#[serde(rename_all = "lowercase")]
107pub enum ImageStyle {
108 #[default]
109 Vivid,
110 Natural,
111}
112
113#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
114#[serde(rename_all = "lowercase")]
115pub enum ImageModeration {
116 #[default]
117 Auto,
118 Low,
119}
120
121#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
122#[serde(rename_all = "lowercase")]
123pub enum ImageOutputFormat {
124 #[default]
125 Png,
126 Jpeg,
127 Webp,
128}
129
130#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
131#[serde(rename_all = "lowercase")]
132pub enum ImageBackground {
133 #[default]
134 Auto,
135 Transparent,
136 Opaque,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize, Default, Builder, PartialEq)]
140#[builder(name = "CreateImageRequestArgs")]
141#[builder(pattern = "mutable")]
142#[builder(setter(into, strip_option), default)]
143#[builder(derive(Debug))]
144#[builder(build_fn(error = "OpenAIError"))]
145pub struct CreateImageRequest {
146 /// A text description of the desired image(s). The maximum length is 32000 characters for
147 /// the GPT image models, 1000 characters for `dall-e-2` and 4000 characters for `dall-e-3`.
148 pub prompt: String,
149
150 /// The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or the GPT image model
151 /// (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`). Defaults
152 /// to `dall-e-2` unless a parameter specific to the GPT image models is used.
153 #[serde(skip_serializing_if = "Option::is_none")]
154 pub model: Option<ImageModel>,
155
156 /// The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
157 #[serde(skip_serializing_if = "Option::is_none")]
158 pub n: Option<u8>, // min:1 max:10 default:1
159
160 /// The quality of the image that will be generated.
161 ///
162 /// - `auto` (default value) will automatically select the best quality for the given model.
163 /// - `high`, `medium` and `low` are supported for the GPT image models.
164 /// - `hd` and `standard` are supported for `dall-e-3`.
165 /// - `standard` is the only option for `dall-e-2`.
166 #[serde(skip_serializing_if = "Option::is_none")]
167 pub quality: Option<ImageQuality>,
168
169 /// The format in which generated images with `dall-e-2` and `dall-e-3` are returned. Must be one of
170 /// `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated. This
171 /// parameter isn't supported for the GPT image models which will always return base64-encoded images.
172 #[serde(skip_serializing_if = "Option::is_none")]
173 pub response_format: Option<ImageResponseFormat>,
174
175 /// The format in which the generated images are returned. This parameter is only supported for
176 /// the GPT image models. Must be one of `png`, `jpeg`, or `webp`.
177 #[serde(skip_serializing_if = "Option::is_none")]
178 pub output_format: Option<ImageOutputFormat>,
179
180 /// The compression level (0-100%) for the generated images. This parameter is only supported for
181 /// the GPT image models with the `webp` or `jpeg` output formats, and defaults to 100.
182 #[serde(skip_serializing_if = "Option::is_none")]
183 pub output_compression: Option<u8>,
184
185 /// Generate the image in streaming mode. Defaults to `false`. See the
186 /// [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more
187 /// information. This parameter is only supported for the GPT image models.
188 #[serde(skip_serializing_if = "Option::is_none")]
189 pub stream: Option<bool>,
190
191 /// The number of partial images to generate. This parameter is used for
192 /// streaming responses that return partial images. Value must be between 0 and 3.
193 /// When set to 0, the response will be a single image sent in one streaming event.
194 /// Note that the final image may be sent before the full number of partial images
195 /// are generated if the full image is generated more quickly.
196 #[serde(skip_serializing_if = "Option::is_none")]
197 pub partial_images: Option<u8>,
198
199 /// The size of the generated images. For `gpt-image-2` and
200 /// `gpt-image-2-2026-04-21`, arbitrary resolutions are supported as
201 /// `WIDTHxHEIGHT` strings, for example `1536x864`. Width and height
202 /// must both be divisible by 16 and the requested aspect ratio must be
203 /// between 1:3 and 3:1. Resolutions above `2560x1440` are experimental,
204 /// and the maximum supported resolution is `3840x2160`. The requested
205 /// size must also satisfy the model's current pixel and edge limits.
206 /// The standard sizes `1024x1024`, `1536x1024`, and `1024x1536` are
207 /// supported by the GPT image models; `auto` is supported for models
208 /// that allow automatic sizing. For `dall-e-2`, use one of `256x256`,
209 /// `512x512`, or `1024x1024`. For `dall-e-3`, use one of `1024x1024`,
210 /// `1792x1024`, or `1024x1792`.
211 #[serde(skip_serializing_if = "Option::is_none")]
212 pub size: Option<ImageSize>,
213
214 /// Control the content-moderation level for images generated by the GPT image models. Must be either `low`
215 /// for less restrictive filtering or `auto` (default value).
216 #[serde(skip_serializing_if = "Option::is_none")]
217 pub moderation: Option<ImageModeration>,
218
219 /// Allows to set transparency for the background of the generated image(s).
220 /// This parameter is only supported for the GPT image models. Must be one of
221 /// `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
222 /// model will automatically determine the best background for the image.
223 /// If `transparent`, the output format needs to support transparency, so it
224 /// should be set to either `png` (default value) or `webp`.
225 #[serde(skip_serializing_if = "Option::is_none")]
226 pub background: Option<ImageBackground>,
227
228 /// The style of the generated images. This parameter is only supported for `dall-e-3`. Must be one of
229 ///`vivid` or `natural`. Vivid causes the model to lean towards generating hyper-real and dramatic
230 /// images. Natural causes the model to produce more natural, less hyper-real looking images.
231 #[serde(skip_serializing_if = "Option::is_none")]
232 pub style: Option<ImageStyle>,
233
234 /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
235 ///[Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
236 #[serde(skip_serializing_if = "Option::is_none")]
237 pub user: Option<String>,
238}
239
240#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
241#[serde(untagged)]
242pub enum Image {
243 /// The URL of the generated image, if `response_format` is `url` (default).
244 Url {
245 url: String,
246 revised_prompt: Option<String>,
247 },
248 /// The base64-encoded JSON of the generated image, if `response_format` is `b64_json`.
249 B64Json {
250 b64_json: std::sync::Arc<String>,
251 revised_prompt: Option<String>,
252 },
253}
254
255#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
256#[serde(rename_all = "lowercase")]
257pub enum ImageResponseBackground {
258 Transparent,
259 Opaque,
260}
261
262#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
263pub struct ImageGenInputUsageDetails {
264 /// The number of text tokens in the input prompt.
265 pub text_tokens: u32,
266 /// The number of image tokens in the input prompt.
267 pub image_tokens: u32,
268}
269
270#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
271pub struct ImageGenOutputTokensDetails {
272 /// The number of text output tokens generated by the model.
273 pub text_tokens: u32,
274 /// The number of image output tokens generated by the model.
275 pub image_tokens: u32,
276}
277
278#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
279pub struct ImageGenUsage {
280 /// The number of tokens (images and text) in the input prompt.
281 pub input_tokens: u32,
282 /// The total number of tokens (images and text) used for the image generation.
283 pub total_tokens: u32,
284 /// The number of output tokens generated by the model.
285 pub output_tokens: u32,
286 /// The output token details for the image generation.
287 pub output_token_details: Option<ImageGenOutputTokensDetails>,
288 /// The input tokens detailed information for the image generation.
289 pub input_tokens_details: ImageGenInputUsageDetails,
290}
291
292#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
293pub struct ImagesResponse {
294 /// The Unix timestamp (in seconds) of when the image was created.
295 pub created: u32,
296 /// The list of generated images.
297 pub data: Vec<std::sync::Arc<Image>>,
298 /// The background parameter used for the image generation. Either `transparent` or `opaque`.
299 pub background: Option<ImageResponseBackground>,
300 /// The output format of the image generation. Either `png`, `webp`, or `jpeg`.
301 pub output_format: Option<ImageOutputFormat>,
302 /// The size of the generated image. Either `1024x1024`, `1536x1024`, `1024x1536`.
303 pub size: Option<ImageSize>,
304 /// The quality of the image generated. Either `low`, `medium`, or `high`.
305 pub quality: Option<ImageQuality>,
306 /// For the GPT image models only, the token usage information for the image generation.
307 pub usage: Option<ImageGenUsage>,
308}
309
310#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)]
311#[serde(rename_all = "lowercase")]
312pub enum InputFidelity {
313 High,
314 #[default]
315 Low,
316}
317
318#[derive(Debug, Clone, PartialEq)]
319pub enum ImageEditInput {
320 Image(ImageInput),
321 Images(Vec<ImageInput>),
322}
323
324#[derive(Debug, Clone, Default, Builder, PartialEq)]
325#[builder(name = "CreateImageEditRequestArgs")]
326#[builder(pattern = "mutable")]
327#[builder(setter(into, strip_option), default)]
328#[builder(derive(Debug))]
329#[builder(build_fn(error = "OpenAIError"))]
330pub struct CreateImageEditRequest {
331 /// The image(s) to edit. Must be a supported image file or an array of images.
332 ///
333 /// For the GPT image models (the GPT image models, `gpt-image-1-mini`, and `gpt-image-1.5`), each image
334 /// should be a `png`, `webp`, or `jpg` file less
335 /// than 50MB. You can provide up to 16 images.
336 ///
337 /// For `dall-e-2`, you can only provide one image, and it should be a square
338 /// `png` file less than 4MB.
339 pub image: ImageEditInput,
340
341 /// A text description of the desired image(s). The maximum length is 1000 characters
342 /// for `dall-e-2`, and 32000 characters for the GPT image models.
343 pub prompt: String,
344
345 /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where
346 /// `image` should be edited. If there are multiple images provided, the mask will be applied on the
347 /// first image. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`.
348 pub mask: Option<ImageInput>,
349
350 /// Allows to set transparency for the background of the generated image(s).
351 /// This parameter is only supported for the GPT image models. Must be one of
352 /// `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
353 /// model will automatically determine the best background for the image.
354 ///
355 /// If `transparent`, the output format needs to support transparency, so it
356 /// should be set to either `png` (default value) or `webp`.
357 pub background: Option<ImageBackground>,
358
359 /// The model to use for image generation. Only `dall-e-2` and the GPT image models are supported.
360 /// Defaults to `dall-e-2` unless a parameter specific to the GPT image models is used.
361 pub model: Option<ImageModel>,
362
363 /// The number of images to generate. Must be between 1 and 10.
364 pub n: Option<u8>, // min:1 max:10 default:1
365
366 /// The size of the generated images. For `gpt-image-2` and
367 /// `gpt-image-2-2026-04-21`, arbitrary resolutions are supported as
368 /// `WIDTHxHEIGHT` strings, for example `1536x864`. Width and height
369 /// must both be divisible by 16 and the requested aspect ratio must be
370 /// between 1:3 and 3:1. Resolutions above `2560x1440` are experimental,
371 /// and the maximum supported resolution is `3840x2160`. The requested
372 /// size must also satisfy the model's current pixel and edge limits.
373 /// The standard sizes `1024x1024`, `1536x1024`, and `1024x1536` are
374 /// supported by the GPT image models; `auto` is supported for models
375 /// that allow automatic sizing. For `dall-e-2`, use one of `256x256`,
376 /// `512x512`, or `1024x1024`. For `dall-e-3`, use one of `1024x1024`,
377 /// `1792x1024`, or `1024x1792`.
378 pub size: Option<ImageSize>,
379
380 /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs
381 /// are only valid for 60 minutes after the image has been generated. This parameter is only supported
382 /// for `dall-e-2`, as the GPT image models will always return base64-encoded images.
383 pub response_format: Option<ImageResponseFormat>,
384
385 /// The format in which the generated images are returned. This parameter is
386 /// only supported for the GPT image models. Must be one of `png`, `jpeg`, or `webp`.
387 /// The default value is `png`.
388 pub output_format: Option<ImageOutputFormat>,
389
390 /// The compression level (0-100%) for the generated images. This parameter
391 /// is only supported for the GPT image models with the `webp` or `jpeg` output
392 /// formats, and defaults to 100.
393 pub output_compression: Option<u8>,
394
395 /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
396 /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
397 pub user: Option<String>,
398
399 /// Control how much effort the model will exert to match the style and features, especially facial
400 /// features, of input images. This parameter is only supported for the GPT image models. Unsupported for
401 /// `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
402 pub input_fidelity: Option<InputFidelity>,
403
404 /// Edit the image in streaming mode. Defaults to `false`. See the
405 /// [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more
406 /// information.
407 pub stream: Option<bool>,
408
409 /// The number of partial images to generate. This parameter is used for
410 /// streaming responses that return partial images. Value must be between 0 and 3.
411 /// When set to 0, the response will be a single image sent in one streaming event.
412
413 /// Note that the final image may be sent before the full number of partial images
414 /// are generated if the full image is generated more quickly.
415 pub partial_images: Option<u8>,
416
417 /// The quality of the image that will be generated. `high`, `medium` and `low` are only supported for
418 /// the GPT image models. `dall-e-2` only supports `standard` quality. Defaults to `auto`.
419 pub quality: Option<ImageQuality>,
420}
421
422#[derive(Debug, Default, Clone, Builder, PartialEq)]
423#[builder(name = "CreateImageVariationRequestArgs")]
424#[builder(pattern = "mutable")]
425#[builder(setter(into, strip_option), default)]
426#[builder(derive(Debug))]
427#[builder(build_fn(error = "OpenAIError"))]
428pub struct CreateImageVariationRequest {
429 /// The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and
430 /// square.
431 pub image: ImageInput,
432
433 /// The model to use for image generation. Only `dall-e-2` is supported at this time.
434 pub model: Option<ImageModel>,
435
436 /// The number of images to generate. Must be between 1 and 10.
437 pub n: Option<u8>, // min:1 max:10 default:1
438
439 /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs
440 /// are only valid for 60 minutes after the image has been generated.
441 pub response_format: Option<ImageResponseFormat>,
442
443 /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.
444 pub size: Option<DallE2ImageSize>,
445
446 /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
447 /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
448 pub user: Option<String>,
449}
450
451/// Reference an input image by either URL or uploaded file ID.
452/// Provide exactly one of `image_url` or `file_id`.
453#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
454pub struct ImageRefParam {
455 /// A fully qualified URL or base64-encoded data URL.
456 #[serde(skip_serializing_if = "Option::is_none")]
457 pub image_url: Option<String>,
458 /// The File API ID of an uploaded image to use as input.
459 #[serde(skip_serializing_if = "Option::is_none")]
460 pub file_id: Option<String>,
461}
462
463/// JSON request body for image edits.
464///
465/// Use `images` (array of `ImageRefParam`) instead of multipart `image` uploads.
466/// You can reference images via external URLs, data URLs, or uploaded file IDs.
467/// JSON edits support GPT image models only; DALL-E edits require multipart (`dall-e-2` only).
468#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Builder, Default)]
469#[builder(name = "EditImageJsonRequestArgs")]
470#[builder(pattern = "mutable")]
471#[builder(setter(into, strip_option), default)]
472#[builder(derive(Debug))]
473#[builder(build_fn(error = "OpenAIError"))]
474pub struct EditImageJsonRequest {
475 /// The model to use for image editing.
476 #[serde(skip_serializing_if = "Option::is_none")]
477 pub model: Option<ImageModel>,
478
479 /// Input image references to edit. For GPT image models, you can provide up to 16 images.
480 pub images: Vec<ImageRefParam>,
481
482 /// An optional mask image reference indicating which areas of the image should be edited.
483 #[serde(skip_serializing_if = "Option::is_none")]
484 pub mask: Option<ImageRefParam>,
485
486 /// A text description of the desired image edit.
487 pub prompt: String,
488
489 /// The number of edited images to generate. Must be between 1 and 10.
490 #[serde(skip_serializing_if = "Option::is_none")]
491 pub n: Option<u8>,
492
493 /// The quality of the image that will be generated.
494 #[serde(skip_serializing_if = "Option::is_none")]
495 pub quality: Option<ImageQuality>,
496
497 /// Control how much effort the model will exert to match the style and features,
498 /// especially facial features, of input images. Supports `high` and `low`. Defaults to `low`.
499 #[serde(skip_serializing_if = "Option::is_none")]
500 pub input_fidelity: Option<InputFidelity>,
501
502 /// The size of the generated image.
503 #[serde(skip_serializing_if = "Option::is_none")]
504 pub size: Option<ImageSize>,
505
506 /// A unique identifier representing your end-user.
507 #[serde(skip_serializing_if = "Option::is_none")]
508 pub user: Option<String>,
509
510 /// The output format for the generated image.
511 #[serde(skip_serializing_if = "Option::is_none")]
512 pub output_format: Option<ImageOutputFormat>,
513
514 /// The compression level (0-100%) for the generated images.
515 #[serde(skip_serializing_if = "Option::is_none")]
516 pub output_compression: Option<u8>,
517
518 /// Control the content-moderation level for images.
519 #[serde(skip_serializing_if = "Option::is_none")]
520 pub moderation: Option<ImageModeration>,
521
522 /// The background style for the generated image.
523 #[serde(skip_serializing_if = "Option::is_none")]
524 pub background: Option<ImageBackground>,
525
526 /// Whether to stream the image generation. Defaults to `false`.
527 #[serde(skip_serializing_if = "Option::is_none")]
528 pub stream: Option<bool>,
529
530 /// The number of partial images to generate during streaming.
531 #[serde(skip_serializing_if = "Option::is_none")]
532 pub partial_images: Option<u8>,
533}