stabilityai/types/spec_types.rs
1use std::{path::PathBuf, sync::Arc};
2
3use derive_builder::Builder;
4
5use serde::{Deserialize, Serialize};
6
7use crate::error::StabilityAIError;
8
9#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
10pub struct OrganizationMembership {
11 pub id: String,
12 pub is_default: bool,
13 pub name: String,
14 pub role: String,
15}
16
17#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
18pub struct AccountResponseBody {
19 /// The user's email
20 pub email: String,
21 /// The user's ID
22 pub id: String,
23 /// The user's organizations
24 pub organizations: Vec<OrganizationMembership>,
25 /// The user's profile picture
26 pub profile_picture: Option<String>,
27}
28
29#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
30pub struct BalanceResponseBody {
31 /// The balance of the account/organization associated with the API key
32 pub credits: f64,
33}
34
35#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
36pub enum EngineType {
37 AUDIO,
38 CLASSIFICATION,
39 PICTURE,
40 STORAGE,
41 TEXT,
42 VIDEO,
43}
44
45#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
46pub struct Engine {
47 /// Unique identifier for the engine
48 pub id: String,
49 /// Name of the engine
50 pub name: String,
51 pub description: String,
52 /// The type of content this engine produces
53 pub r#type: EngineType,
54}
55
56/// Text prompt for image generation
57#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
58pub struct TextPrompt {
59 /// The prompt itself
60 pub text: String,
61 /// Weight of the prompt (use negative numbers for negative prompts)
62 #[serde(skip_serializing_if = "Option::is_none")]
63 pub weight: Option<f64>,
64}
65
66#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
67pub enum ClipGuidancePreset {
68 #[serde(rename = "FAST_BLUE")]
69 FastBlue,
70 #[serde(rename = "FAST_GREEN")]
71 FastGreen,
72 #[serde(rename = "NONE")]
73 None,
74 #[serde(rename = "SIMPLE")]
75 Simple,
76 #[serde(rename = "SLOW")]
77 Slow,
78 #[serde(rename = "SLOWER")]
79 Slower,
80 #[serde(rename = "SLOWEST")]
81 Slowest,
82}
83
84#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
85pub enum Sampler {
86 #[serde(rename = "DDIM")]
87 Ddim,
88 #[serde(rename = "DDPM")]
89 Ddpm,
90 #[serde(rename = "K_DPMPP_2M")]
91 KDpmpp2m,
92 #[serde(rename = "K_DPMPP_2S_ANCESTRAL")]
93 KDpmpp2sAncestral,
94 #[serde(rename = "K_DPM_2")]
95 KDpm2,
96 #[serde(rename = "K_DPM_2_ANCESTRAL")]
97 KDpm2Ancestral,
98 #[serde(rename = "K_EULER")]
99 KEuler,
100 #[serde(rename = "K_EULER_ANCESTRAL")]
101 KEulerAncestral,
102 #[serde(rename = "K_HEUN")]
103 KHeun,
104 #[serde(rename = "K_LMS")]
105 KLms,
106}
107
108#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
109pub enum StylePreset {
110 #[serde(rename = "3d-model")]
111 ThreeDModel,
112 #[serde(rename = "analog-film")]
113 AnalogFilm,
114 #[serde(rename = "anime")]
115 Anime,
116 #[serde(rename = "cinematic")]
117 Cinematic,
118 #[serde(rename = "comic-book")]
119 ComicBook,
120 #[serde(rename = "digital-art")]
121 DigitalArt,
122 #[serde(rename = "enhance")]
123 Enhance,
124 #[serde(rename = "fantasy-art")]
125 FantasyArt,
126 #[serde(rename = "isometric")]
127 Isometric,
128 #[serde(rename = "line-art")]
129 LineArt,
130 #[serde(rename = "low-poly")]
131 LowPoly,
132 #[serde(rename = "modeling-compound")]
133 ModelingCompound,
134 #[serde(rename = "neon-punk")]
135 NeonPunk,
136 #[serde(rename = "origami")]
137 Origami,
138 #[serde(rename = "photographic")]
139 Photographic,
140 #[serde(rename = "pixel-art")]
141 PixelArt,
142 #[serde(rename = "tile-texture")]
143 TileTexture,
144}
145
146#[derive(Debug, Deserialize, Serialize, Default, Clone, PartialEq)]
147pub struct TextPrompts {
148 pub text_prompts: Vec<TextPrompt>,
149}
150
151#[derive(Debug, Serialize, Default, Clone, PartialEq, Builder)]
152#[builder(name = "TextToImageRequestBodyArgs")]
153#[builder(pattern = "mutable")]
154#[builder(setter(into, strip_option), default)]
155#[builder(derive(Debug))]
156#[builder(build_fn(error = "StabilityAIError"))]
157pub struct TextToImageRequestBody {
158 /// An array of text prompts to use for generation.
159 ///
160 ///
161 /// Due to how arrays are represented in `multipart/form-data` requests,
162 /// prompts must adhere to the format `text_prompts[index][text|weight]`,
163 ///
164 /// where `index` is some integer used to tie the text and weight together.
165 /// While `index` does not have to be sequential, duplicate entries
166 /// will override previous entries, so it is recommended to use sequential
167 /// indices.
168 ///
169 ///
170 /// Given a text prompt with the text `A lighthouse on a cliff` and a weight
171 /// of `0.5`, it would be represented as:
172 ///
173 /// `text_prompts[0][text]: "A lighthouse on a cliff"`
174 ///
175 /// `text_prompts[0][weight]: 0.5`
176 ///
177 ///
178 /// To add another prompt to that request simply provide the values
179 /// under a new `index`:
180 ///
181 ///
182 /// `text_prompts[0][text]: "A lighthouse on a cliff"`
183 ///
184 /// `text_prompts[0][weight]: 0.5`
185 ///
186 /// `text_prompts[1][text]: "land, ground, dirt, grass"`
187 ///
188 /// `text_prompts[1][weight]: -0.9`
189 #[serde(flatten)]
190 pub text_prompts: TextPrompts,
191
192 /// Height of the image in pixels. Must be in increments of 64 and pass the
193 /// following validation:
194 ///
195 /// - For 512 engines: 262,144 ≤ `height * width` ≤ 1,048,576
196 ///
197 /// - For 768 engines: 589,824 ≤ `height * width` ≤ 1,048,576
198 ///
199 /// - For SDXL Beta: can be as low as 128 and as high as 896 as long as `width`
200 /// is not greater than 512. If `width` is greater than 512 then this can
201 /// be _at most_ 512.
202 ///
203 /// - For SDXL v0.9: valid dimensions are 1024x1024, 1152x896, 1216x832,
204 /// 1344x768, 1536x640, 640x1536, 768x1344, 832x1216, or 896x1152
205 ///
206 /// - For SDXL v1.0: valid dimensions are the same as SDXL v0.9
207 #[serde(skip_serializing_if = "Option::is_none")]
208 pub height: Option<u16>,
209
210 /// Width of the image in pixels. Must be in increments of 64 and pass the
211 /// following validation:
212 ///
213 /// - For 512 engines: 262,144 ≤ `height * width` ≤ 1,048,576
214 ///
215 /// - For 768 engines: 589,824 ≤ `height * width` ≤ 1,048,576
216 ///
217 /// - For SDXL Beta: can be as low as 128 and as high as 896 as long as `height`
218 /// is not greater than 512. If `height` is greater than 512 then this can be _at most_ 512.
219 ///
220 /// - For SDXL v0.9: valid dimensions are 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640,
221 /// 640x1536, 768x1344, 832x1216, or 896x1152
222 ///
223 /// - For SDXL v1.0: valid dimensions are the same as SDXL v0.9
224 #[serde(skip_serializing_if = "Option::is_none")]
225 pub width: Option<u16>,
226
227 /// How strictly the diffusion process adheres to the prompt text
228 /// (higher values keep your image closer to your prompt)
229 #[serde(skip_serializing_if = "Option::is_none")]
230 pub cfg_scale: Option<u8>,
231
232 #[serde(skip_serializing_if = "Option::is_none")]
233 pub clip_guidance_preset: Option<ClipGuidancePreset>,
234
235 /// Which sampler to use for the diffusion process.
236 /// If this value is omitted we'll automatically select
237 /// an appropriate sampler for you.
238 #[serde(skip_serializing_if = "Option::is_none")]
239 pub sampler: Option<Sampler>,
240
241 /// Number of images to generate
242 #[serde(skip_serializing_if = "Option::is_none")]
243 pub samples: Option<u8>,
244
245 /// Random noise seed (omit this option or use `0` for a random seed)
246 #[serde(skip_serializing_if = "Option::is_none")]
247 pub seed: Option<u32>,
248
249 /// Number of diffusion steps to run
250 #[serde(skip_serializing_if = "Option::is_none")]
251 pub steps: Option<u32>,
252
253 /// Pass in a style preset to guide the image model towards a particular style.
254 ///
255 /// This list of style presets is subject to change.
256 #[serde(skip_serializing_if = "Option::is_none")]
257 pub style_preset: Option<StylePreset>,
258
259 /// Extra parameters passed to the engine.
260 ///
261 /// These parameters are used for in-development or experimental features
262 /// and may change without warning, so please use with caution.
263 #[serde(skip_serializing_if = "Option::is_none")]
264 pub extras: Option<serde_json::Value>,
265}
266
267#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
268pub enum FinishReason {
269 #[serde(rename = "CONTENT_FILTERED")]
270 ContentFiltered,
271 #[serde(rename = "ERROR")]
272 Error,
273 #[serde(rename = "SUCCESS")]
274 Success,
275}
276
277#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
278pub struct Artifacts {
279 pub artifacts: Vec<Arc<Image>>,
280}
281
282#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
283pub struct Image {
284 pub base64: String,
285 #[serde(rename = "finishReason")]
286 pub finish_reason: FinishReason,
287 pub seed: i64,
288}
289
290#[derive(Debug, Deserialize, Default, Serialize, Clone, PartialEq)]
291pub enum InitImageMode {
292 #[default]
293 #[serde(rename = "IMAGE_STRENGTH")]
294 ImageStrength,
295 #[serde(rename = "STEP_SCHEDULE")]
296 StepSchedule,
297}
298
299#[derive(Debug, Default, Clone, PartialEq)]
300pub struct InitImage {
301 pub path: PathBuf,
302}
303
304#[derive(Debug, Default, Clone, PartialEq, Builder)]
305#[builder(name = "ImageToImageRequestBodyArgs")]
306#[builder(pattern = "mutable")]
307#[builder(setter(into, strip_option), default)]
308#[builder(derive(Debug))]
309#[builder(build_fn(error = "StabilityAIError"))]
310pub struct ImageToImageRequestBody {
311 pub text_prompts: TextPrompts,
312
313 pub init_image: InitImage,
314
315 /// Whether to use `image_strength` or `step_schedule_*` to control how
316 /// much influence the `init_image` has on the result.
317 pub init_image_mode: Option<InitImageMode>,
318
319 /// How much influence the `init_image` has on the diffusion process.
320 /// Values close to `1` will yield images very similar to the `init_image`
321 /// while values close to `0` will yield images wildly different than
322 /// the `init_image`. The behavior of this is meant to mirror DreamStudio's
323 /// \"Image Strength\" slider. <br/> <br/> This parameter is just an
324 /// alternate way to set `step_schedule_start`, which is done via the
325 /// calculation `1 - image_strength`. For example, passing in an Image
326 /// Strength of 35% (`0.35`) would result in a `step_schedule_start` of
327 /// `0.65`.\n"
328 pub image_strength: Option<f64>,
329
330 /// Skips a proportion of the start of the diffusion steps, allowing the
331 /// init_image to influence the final generated image. Lower values will
332 /// result in more influence from the init_image, while higher values will
333 /// result in more influence from the diffusion steps. (e.g. a value
334 /// of `0` would simply return you the init_image, where a value of `1`
335 /// would return you a completely different image.)
336 pub step_schedule_start: Option<f64>,
337
338 /// Skips a proportion of the end of the diffusion steps, allowing the
339 /// init_image to influence the final generated image. Lower values will
340 /// result in more influence from the init_image, while higher values will
341 /// result in more influence from the diffusion steps.
342 pub step_schedule_end: Option<f64>,
343
344 /// How strictly the diffusion process adheres to the prompt text
345 /// (higher values keep your image closer to your prompt)
346 pub cfg_scale: Option<u8>,
347
348 pub clip_guidance_preset: Option<ClipGuidancePreset>,
349
350 /// Which sampler to use for the diffusion process.
351 /// If this value is omitted we'll automatically select
352 /// an appropriate sampler for you.
353 pub sampler: Option<Sampler>,
354
355 /// Number of images to generate
356 pub samples: Option<u8>,
357
358 /// Random noise seed (omit this option or use `0` for a random seed)
359 pub seed: Option<u32>,
360
361 /// Number of diffusion steps to run
362 pub steps: Option<u32>,
363
364 /// Pass in a style preset to guide the image model towards a particular style.
365 ///
366 /// This list of style presets is subject to change.
367 pub style_preset: Option<StylePreset>,
368
369 /// Extra parameters passed to the engine.
370 ///
371 /// These parameters are used for in-development or experimental features
372 /// and may change without warning, so please use with caution.
373 pub extras: Option<serde_json::Value>,
374}
375
376#[derive(Debug, Default, Clone, PartialEq)]
377pub struct InputImage {
378 pub path: PathBuf,
379}
380
381#[derive(Debug, Clone, PartialEq)]
382pub enum ImageToImageUpscaleBody {
383 LatentUpscalerUpscaleRequestBody(LatentUpscalerUpscaleRequestBody),
384 RealESRGANUpscaleRequestBody(RealESRGANUpscaleRequestBody),
385}
386
387#[derive(Debug, Default, Clone, PartialEq, Builder)]
388#[builder(name = "RealESRGANUpscaleRequestBodyArgs")]
389#[builder(pattern = "mutable")]
390#[builder(setter(into, strip_option), default)]
391#[builder(derive(Debug))]
392#[builder(build_fn(error = "StabilityAIError"))]
393pub struct RealESRGANUpscaleRequestBody {
394 pub image: InputImage,
395
396 /// Desired height of the output image.
397 /// Only one of `width` or `height` may be specified.
398 pub height: Option<u16>,
399
400 /// Desired width of the output image.
401 /// Only one of `width` or `height` may be specified.
402 pub width: Option<u16>,
403}
404
405#[derive(Debug, Default, Clone, PartialEq, Builder)]
406#[builder(name = "LatentUpscalerUpscaleRequestBodyArgs")]
407#[builder(pattern = "mutable")]
408#[builder(setter(into, strip_option), default)]
409#[builder(derive(Debug))]
410#[builder(build_fn(error = "StabilityAIError"))]
411pub struct LatentUpscalerUpscaleRequestBody {
412 pub image: InputImage,
413
414 pub text_prompts: Option<TextPrompts>,
415
416 /// Desired height of the output image.
417 /// Only one of `width` or `height` may be specified.
418 pub height: Option<u16>,
419
420 /// Desired width of the output image.
421 /// Only one of `width` or `height` may be specified.
422 pub width: Option<u16>,
423
424 /// How strictly the diffusion process adheres to the prompt text
425 /// (higher values keep your image closer to your prompt)
426 pub cfg_scale: Option<u8>,
427
428 /// Random noise seed (omit this option or use `0` for a random seed)
429 pub seed: Option<u32>,
430
431 /// Number of diffusion steps to run
432 pub steps: Option<u32>,
433}
434
435#[derive(Debug, Deserialize, Default, Serialize, Clone, PartialEq)]
436pub enum MaskSource {
437 #[default]
438 #[serde(rename = "MASK_IMAGE_BLACK")]
439 MaskImageBlack,
440 #[serde(rename = "MASK_IMAGE_WHITE")]
441 MaskImageWhite,
442 #[serde(rename = "INIT_IMAGE_ALPHA")]
443 InitImageAlpha,
444}
445
446#[derive(Debug, Default, Clone, PartialEq)]
447pub struct MaskImage {
448 pub path: PathBuf,
449}
450
451#[derive(Debug, Default, Clone, PartialEq, Builder)]
452#[builder(name = "MaskingRequestBodyArgs")]
453#[builder(pattern = "mutable")]
454#[builder(setter(into, strip_option), default)]
455#[builder(derive(Debug))]
456#[builder(build_fn(error = "StabilityAIError"))]
457pub struct MaskingRequestBody {
458 /// Image used to initialize the diffusion process, in lieu of random noise.
459 pub init_image: InitImage,
460
461 /// For any given pixel, the mask determines the strength of generation
462 /// on a linear scale. This parameter determines where to source the
463 /// mask from:
464 ///
465 /// - `MASK_IMAGE_WHITE` will use the white pixels of the
466 /// mask_image as the mask, where white pixels are completely replaced
467 /// and black pixels are unchanged
468 ///
469 /// - `MASK_IMAGE_BLACK` will use the
470 /// black pixels of the mask_image as the mask, where black pixels are
471 /// completely replaced and white pixels are unchanged
472 ///
473 /// - `INIT_IMAGE_ALPHA` will use the alpha channel of the init_image
474 /// as the mask, where fully transparent pixels are completely replaced
475 /// and fully opaque pixels are unchanged
476 pub mask_source: MaskSource,
477
478 /// Optional grayscale mask that allows for influence over which pixels
479 /// are eligible for diffusion and at what strength. Must be the same
480 /// dimensions as the `init_image`. Use the `mask_source` option to
481 /// specify whether the white or black pixels should be inpainted.
482 pub mask_image: Option<MaskImage>,
483
484 pub text_prompts: TextPrompts,
485
486 /// How strictly the diffusion process adheres to the prompt text
487 /// (higher values keep your image closer to your prompt)
488 pub cfg_scale: Option<u8>,
489
490 pub clip_guidance_preset: Option<ClipGuidancePreset>,
491
492 /// Which sampler to use for the diffusion process.
493 /// If this value is omitted we'll automatically select
494 /// an appropriate sampler for you.
495 pub sampler: Option<Sampler>,
496
497 /// Number of images to generate
498 pub samples: Option<u8>,
499
500 /// Random noise seed (omit this option or use `0` for a random seed)
501 pub seed: Option<u32>,
502
503 /// Number of diffusion steps to run
504 pub steps: Option<u32>,
505
506 /// Pass in a style preset to guide the image model towards a particular style.
507 ///
508 /// This list of style presets is subject to change.
509 pub style_preset: Option<StylePreset>,
510
511 /// Extra parameters passed to the engine.
512 ///
513 /// These parameters are used for in-development or experimental features
514 /// and may change without warning, so please use with caution.
515 pub extras: Option<serde_json::Value>,
516}