async_openai/types/images/image.rs
1use derive_builder::Builder;
2use serde::{Deserialize, Serialize};
3
4use crate::error::OpenAIError;
5use crate::types::images::ImageInput;
6
7#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
8pub enum ImageSize {
9 #[default]
10 #[serde(rename = "auto")]
11 Auto,
12 #[serde(rename = "256x256")]
13 S256x256,
14 #[serde(rename = "512x512")]
15 S512x512,
16 #[serde(rename = "1024x1024")]
17 S1024x1024,
18 #[serde(rename = "1792x1024")]
19 S1792x1024,
20 #[serde(rename = "1024x1792")]
21 S1024x1792,
22 #[serde(rename = "1536x1024")]
23 S1536x1024,
24 #[serde(rename = "1024x1536")]
25 S1024x1536,
26}
27
28#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
29pub enum DallE2ImageSize {
30 #[serde(rename = "256x256")]
31 S256x256,
32 #[serde(rename = "512x512")]
33 S512x512,
34 #[default]
35 #[serde(rename = "1024x1024")]
36 S1024x1024,
37}
38
39#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
40pub enum DallE3ImageSize {
41 #[default]
42 #[serde(rename = "1024x1024")]
43 S1024x1024,
44 #[serde(rename = "1792x1024")]
45 S1792x1024,
46 #[serde(rename = "1024x1792")]
47 S1024x1792,
48}
49
50#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
51pub enum GptImage1ImageSize {
52 #[default]
53 #[serde(rename = "auto")]
54 Auto,
55 #[serde(rename = "1024x1024")]
56 S1024x1024,
57 #[serde(rename = "1536x1024")]
58 S1536x1024,
59 #[serde(rename = "1024x1536")]
60 S1024x1536,
61}
62
63#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)]
64#[serde(rename_all = "lowercase")]
65pub enum ImageResponseFormat {
66 #[default]
67 Url,
68 #[serde(rename = "b64_json")]
69 B64Json,
70}
71
72#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
73pub enum ImageModel {
74 #[serde(rename = "gpt-image-1")]
75 GptImage1,
76 #[serde(rename = "gpt-image-1.5")]
77 GptImage1dot5,
78 #[serde(rename = "gpt-image-1-mini")]
79 GptImage1Mini,
80 #[default]
81 #[serde(rename = "dall-e-2")]
82 DallE2,
83 #[serde(rename = "dall-e-3")]
84 DallE3,
85 #[serde(untagged)]
86 Other(String),
87}
88
89#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
90#[serde(rename_all = "lowercase")]
91pub enum ImageQuality {
92 Standard,
93 HD,
94 High,
95 Medium,
96 Low,
97 #[default]
98 Auto,
99}
100
101#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
102#[serde(rename_all = "lowercase")]
103pub enum ImageStyle {
104 #[default]
105 Vivid,
106 Natural,
107}
108
109#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
110#[serde(rename_all = "lowercase")]
111pub enum ImageModeration {
112 #[default]
113 Auto,
114 Low,
115}
116
117#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
118#[serde(rename_all = "lowercase")]
119pub enum ImageOutputFormat {
120 #[default]
121 Png,
122 Jpeg,
123 Webp,
124}
125
126#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
127#[serde(rename_all = "lowercase")]
128pub enum ImageBackground {
129 #[default]
130 Auto,
131 Transparent,
132 Opaque,
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize, Default, Builder, PartialEq)]
136#[builder(name = "CreateImageRequestArgs")]
137#[builder(pattern = "mutable")]
138#[builder(setter(into, strip_option), default)]
139#[builder(derive(Debug))]
140#[builder(build_fn(error = "OpenAIError"))]
141pub struct CreateImageRequest {
142 /// A text description of the desired image(s). The maximum length is 32000 characters for
143 /// the GPT image models, 1000 characters for `dall-e-2` and 4000 characters for `dall-e-3`.
144 pub prompt: String,
145
146 /// The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or the GPT image model
147 /// (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`). Defaults
148 /// to `dall-e-2` unless a parameter specific to the GPT image models is used.
149 #[serde(skip_serializing_if = "Option::is_none")]
150 pub model: Option<ImageModel>,
151
152 /// The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
153 #[serde(skip_serializing_if = "Option::is_none")]
154 pub n: Option<u8>, // min:1 max:10 default:1
155
156 /// The quality of the image that will be generated.
157 ///
158 /// - `auto` (default value) will automatically select the best quality for the given model.
159 /// - `high`, `medium` and `low` are supported for the GPT image models.
160 /// - `hd` and `standard` are supported for `dall-e-3`.
161 /// - `standard` is the only option for `dall-e-2`.
162 #[serde(skip_serializing_if = "Option::is_none")]
163 pub quality: Option<ImageQuality>,
164
165 /// The format in which generated images with `dall-e-2` and `dall-e-3` are returned. Must be one of
166 /// `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated. This
167 /// parameter isn't supported for the GPT image models which will always return base64-encoded images.
168 #[serde(skip_serializing_if = "Option::is_none")]
169 pub response_format: Option<ImageResponseFormat>,
170
171 /// The format in which the generated images are returned. This parameter is only supported for
172 /// the GPT image models. Must be one of `png`, `jpeg`, or `webp`.
173 #[serde(skip_serializing_if = "Option::is_none")]
174 pub output_format: Option<ImageOutputFormat>,
175
176 /// The compression level (0-100%) for the generated images. This parameter is only supported for
177 /// the GPT image models with the `webp` or `jpeg` output formats, and defaults to 100.
178 #[serde(skip_serializing_if = "Option::is_none")]
179 pub output_compression: Option<u8>,
180
181 /// Generate the image in streaming mode. Defaults to `false`. See the
182 /// [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more
183 /// information. This parameter is only supported for the GPT image models.
184 #[serde(skip_serializing_if = "Option::is_none")]
185 pub stream: Option<bool>,
186
187 /// The number of partial images to generate. This parameter is used for
188 /// streaming responses that return partial images. Value must be between 0 and 3.
189 /// When set to 0, the response will be a single image sent in one streaming event.
190 /// Note that the final image may be sent before the full number of partial images
191 /// are generated if the full image is generated more quickly.
192 #[serde(skip_serializing_if = "Option::is_none")]
193 pub partial_images: Option<u8>,
194
195 /// The size of the generated images. Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536`
196 /// (portrait), or `auto` (default value) for the GPT image models, one of `256x256`, `512x512`, or
197 /// `1024x1024` for `dall-e-2`, and one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
198 #[serde(skip_serializing_if = "Option::is_none")]
199 pub size: Option<ImageSize>,
200
201 /// Control the content-moderation level for images generated by the GPT image models. Must be either `low`
202 /// for less restrictive filtering or `auto` (default value).
203 #[serde(skip_serializing_if = "Option::is_none")]
204 pub moderation: Option<ImageModeration>,
205
206 /// Allows to set transparency for the background of the generated image(s).
207 /// This parameter is only supported for the GPT image models. Must be one of
208 /// `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
209 /// model will automatically determine the best background for the image.
210 /// If `transparent`, the output format needs to support transparency, so it
211 /// should be set to either `png` (default value) or `webp`.
212 #[serde(skip_serializing_if = "Option::is_none")]
213 pub background: Option<ImageBackground>,
214
215 /// The style of the generated images. This parameter is only supported for `dall-e-3`. Must be one of
216 ///`vivid` or `natural`. Vivid causes the model to lean towards generating hyper-real and dramatic
217 /// images. Natural causes the model to produce more natural, less hyper-real looking images.
218 #[serde(skip_serializing_if = "Option::is_none")]
219 pub style: Option<ImageStyle>,
220
221 /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
222 ///[Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
223 #[serde(skip_serializing_if = "Option::is_none")]
224 pub user: Option<String>,
225}
226
227#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
228#[serde(untagged)]
229pub enum Image {
230 /// The URL of the generated image, if `response_format` is `url` (default).
231 Url {
232 url: String,
233 revised_prompt: Option<String>,
234 },
235 /// The base64-encoded JSON of the generated image, if `response_format` is `b64_json`.
236 B64Json {
237 b64_json: std::sync::Arc<String>,
238 revised_prompt: Option<String>,
239 },
240}
241
242#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
243#[serde(rename_all = "lowercase")]
244pub enum ImageResponseBackground {
245 Transparent,
246 Opaque,
247}
248
249#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
250pub struct ImageGenInputUsageDetails {
251 /// The number of text tokens in the input prompt.
252 pub text_tokens: u32,
253 /// The number of image tokens in the input prompt.
254 pub image_tokens: u32,
255}
256
257#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
258pub struct ImageGenOutputTokensDetails {
259 /// The number of text output tokens generated by the model.
260 pub text_tokens: u32,
261 /// The number of image output tokens generated by the model.
262 pub image_tokens: u32,
263}
264
265#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
266pub struct ImageGenUsage {
267 /// The number of tokens (images and text) in the input prompt.
268 pub input_tokens: u32,
269 /// The total number of tokens (images and text) used for the image generation.
270 pub total_tokens: u32,
271 /// The number of output tokens generated by the model.
272 pub output_tokens: u32,
273 /// The output token details for the image generation.
274 pub output_token_details: ImageGenOutputTokensDetails,
275 /// The input tokens detailed information for the image generation.
276 pub input_tokens_details: ImageGenInputUsageDetails,
277}
278
279#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
280pub struct ImagesResponse {
281 /// The Unix timestamp (in seconds) of when the image was created.
282 pub created: u32,
283 /// The list of generated images.
284 pub data: Vec<std::sync::Arc<Image>>,
285 /// The background parameter used for the image generation. Either `transparent` or `opaque`.
286 pub background: Option<ImageResponseBackground>,
287 /// The output format of the image generation. Either `png`, `webp`, or `jpeg`.
288 pub output_format: Option<ImageOutputFormat>,
289 /// The size of the generated image. Either `1024x1024`, `1536x1024`, `1024x1536`.
290 pub size: Option<ImageSize>,
291 /// The quality of the image generated. Either `low`, `medium`, or `high`.
292 pub quality: Option<ImageQuality>,
293 /// For the GPT image models only, the token usage information for the image generation.
294 pub usage: Option<ImageGenUsage>,
295}
296
297#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)]
298#[serde(rename_all = "lowercase")]
299pub enum InputFidelity {
300 High,
301 #[default]
302 Low,
303}
304
305#[derive(Debug, Clone, PartialEq)]
306pub enum ImageEditInput {
307 Image(ImageInput),
308 Images(Vec<ImageInput>),
309}
310
311#[derive(Debug, Clone, Default, Builder, PartialEq)]
312#[builder(name = "CreateImageEditRequestArgs")]
313#[builder(pattern = "mutable")]
314#[builder(setter(into, strip_option), default)]
315#[builder(derive(Debug))]
316#[builder(build_fn(error = "OpenAIError"))]
317pub struct CreateImageEditRequest {
318 /// The image(s) to edit. Must be a supported image file or an array of images.
319 ///
320 /// For the GPT image models (the GPT image models, `gpt-image-1-mini`, and `gpt-image-1.5`), each image
321 /// should be a `png`, `webp`, or `jpg` file less
322 /// than 50MB. You can provide up to 16 images.
323 ///
324 /// For `dall-e-2`, you can only provide one image, and it should be a square
325 /// `png` file less than 4MB.
326 pub image: ImageEditInput,
327
328 /// A text description of the desired image(s). The maximum length is 1000 characters
329 /// for `dall-e-2`, and 32000 characters for the GPT image models.
330 pub prompt: String,
331
332 /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where
333 /// `image` should be edited. If there are multiple images provided, the mask will be applied on the
334 /// first image. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`.
335 pub mask: Option<ImageInput>,
336
337 /// Allows to set transparency for the background of the generated image(s).
338 /// This parameter is only supported for the GPT image models. Must be one of
339 /// `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
340 /// model will automatically determine the best background for the image.
341 ///
342 /// If `transparent`, the output format needs to support transparency, so it
343 /// should be set to either `png` (default value) or `webp`.
344 pub background: Option<ImageBackground>,
345
346 /// The model to use for image generation. Only `dall-e-2` and the GPT image models are supported.
347 /// Defaults to `dall-e-2` unless a parameter specific to the GPT image models is used.
348 pub model: Option<ImageModel>,
349
350 /// The number of images to generate. Must be between 1 and 10.
351 pub n: Option<u8>, // min:1 max:10 default:1
352
353 /// The size of the generated images. Must be one of `1024x1024`, `1536x1024` (landscape),
354 /// `1024x1536` (portrait), or `auto` (default value) for the GPT image models, and one of `256x256`,
355 /// `512x512`, or `1024x1024` for `dall-e-2`.
356 pub size: Option<ImageSize>,
357
358 /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs
359 /// are only valid for 60 minutes after the image has been generated. This parameter is only supported
360 /// for `dall-e-2`, as the GPT image models will always return base64-encoded images.
361 pub response_format: Option<ImageResponseFormat>,
362
363 /// The format in which the generated images are returned. This parameter is
364 /// only supported for the GPT image models. Must be one of `png`, `jpeg`, or `webp`.
365 /// The default value is `png`.
366 pub output_format: Option<ImageOutputFormat>,
367
368 /// The compression level (0-100%) for the generated images. This parameter
369 /// is only supported for the GPT image models with the `webp` or `jpeg` output
370 /// formats, and defaults to 100.
371 pub output_compression: Option<u8>,
372
373 /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
374 /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
375 pub user: Option<String>,
376
377 /// Control how much effort the model will exert to match the style and features, especially facial
378 /// features, of input images. This parameter is only supported for the GPT image models. Unsupported for
379 /// `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
380 pub input_fidelity: Option<InputFidelity>,
381
382 /// Edit the image in streaming mode. Defaults to `false`. See the
383 /// [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more
384 /// information.
385 pub stream: Option<bool>,
386
387 /// The number of partial images to generate. This parameter is used for
388 /// streaming responses that return partial images. Value must be between 0 and 3.
389 /// When set to 0, the response will be a single image sent in one streaming event.
390
391 /// Note that the final image may be sent before the full number of partial images
392 /// are generated if the full image is generated more quickly.
393 pub partial_images: Option<u8>,
394
395 /// The quality of the image that will be generated. `high`, `medium` and `low` are only supported for
396 /// the GPT image models. `dall-e-2` only supports `standard` quality. Defaults to `auto`.
397 pub quality: Option<ImageQuality>,
398}
399
400#[derive(Debug, Default, Clone, Builder, PartialEq)]
401#[builder(name = "CreateImageVariationRequestArgs")]
402#[builder(pattern = "mutable")]
403#[builder(setter(into, strip_option), default)]
404#[builder(derive(Debug))]
405#[builder(build_fn(error = "OpenAIError"))]
406pub struct CreateImageVariationRequest {
407 /// The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and
408 /// square.
409 pub image: ImageInput,
410
411 /// The model to use for image generation. Only `dall-e-2` is supported at this time.
412 pub model: Option<ImageModel>,
413
414 /// The number of images to generate. Must be between 1 and 10.
415 pub n: Option<u8>, // min:1 max:10 default:1
416
417 /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs
418 /// are only valid for 60 minutes after the image has been generated.
419 pub response_format: Option<ImageResponseFormat>,
420
421 /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.
422 pub size: Option<DallE2ImageSize>,
423
424 /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
425 /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
426 pub user: Option<String>,
427}