stabilityai/types/
spec_types.rs

1use std::{path::PathBuf, sync::Arc};
2
3use derive_builder::Builder;
4
5use serde::{Deserialize, Serialize};
6
7use crate::error::StabilityAIError;
8
9#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
10pub struct OrganizationMembership {
11    pub id: String,
12    pub is_default: bool,
13    pub name: String,
14    pub role: String,
15}
16
17#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
18pub struct AccountResponseBody {
19    /// The user's email
20    pub email: String,
21    /// The user's ID
22    pub id: String,
23    /// The user's organizations
24    pub organizations: Vec<OrganizationMembership>,
25    /// The user's profile picture
26    pub profile_picture: Option<String>,
27}
28
29#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
30pub struct BalanceResponseBody {
31    /// The balance of the account/organization associated with the API key
32    pub credits: f64,
33}
34
35#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
36pub enum EngineType {
37    AUDIO,
38    CLASSIFICATION,
39    PICTURE,
40    STORAGE,
41    TEXT,
42    VIDEO,
43}
44
45#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
46pub struct Engine {
47    /// Unique identifier for the engine
48    pub id: String,
49    /// Name of the engine
50    pub name: String,
51    pub description: String,
52    /// The type of content this engine produces
53    pub r#type: EngineType,
54}
55
56/// Text prompt for image generation
57#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
58pub struct TextPrompt {
59    /// The prompt itself
60    pub text: String,
61    /// Weight of the prompt (use negative numbers for negative prompts)
62    #[serde(skip_serializing_if = "Option::is_none")]
63    pub weight: Option<f64>,
64}
65
66#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
67pub enum ClipGuidancePreset {
68    #[serde(rename = "FAST_BLUE")]
69    FastBlue,
70    #[serde(rename = "FAST_GREEN")]
71    FastGreen,
72    #[serde(rename = "NONE")]
73    None,
74    #[serde(rename = "SIMPLE")]
75    Simple,
76    #[serde(rename = "SLOW")]
77    Slow,
78    #[serde(rename = "SLOWER")]
79    Slower,
80    #[serde(rename = "SLOWEST")]
81    Slowest,
82}
83
84#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
85pub enum Sampler {
86    #[serde(rename = "DDIM")]
87    Ddim,
88    #[serde(rename = "DDPM")]
89    Ddpm,
90    #[serde(rename = "K_DPMPP_2M")]
91    KDpmpp2m,
92    #[serde(rename = "K_DPMPP_2S_ANCESTRAL")]
93    KDpmpp2sAncestral,
94    #[serde(rename = "K_DPM_2")]
95    KDpm2,
96    #[serde(rename = "K_DPM_2_ANCESTRAL")]
97    KDpm2Ancestral,
98    #[serde(rename = "K_EULER")]
99    KEuler,
100    #[serde(rename = "K_EULER_ANCESTRAL")]
101    KEulerAncestral,
102    #[serde(rename = "K_HEUN")]
103    KHeun,
104    #[serde(rename = "K_LMS")]
105    KLms,
106}
107
108#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
109pub enum StylePreset {
110    #[serde(rename = "3d-model")]
111    ThreeDModel,
112    #[serde(rename = "analog-film")]
113    AnalogFilm,
114    #[serde(rename = "anime")]
115    Anime,
116    #[serde(rename = "cinematic")]
117    Cinematic,
118    #[serde(rename = "comic-book")]
119    ComicBook,
120    #[serde(rename = "digital-art")]
121    DigitalArt,
122    #[serde(rename = "enhance")]
123    Enhance,
124    #[serde(rename = "fantasy-art")]
125    FantasyArt,
126    #[serde(rename = "isometric")]
127    Isometric,
128    #[serde(rename = "line-art")]
129    LineArt,
130    #[serde(rename = "low-poly")]
131    LowPoly,
132    #[serde(rename = "modeling-compound")]
133    ModelingCompound,
134    #[serde(rename = "neon-punk")]
135    NeonPunk,
136    #[serde(rename = "origami")]
137    Origami,
138    #[serde(rename = "photographic")]
139    Photographic,
140    #[serde(rename = "pixel-art")]
141    PixelArt,
142    #[serde(rename = "tile-texture")]
143    TileTexture,
144}
145
146#[derive(Debug, Deserialize, Serialize, Default, Clone, PartialEq)]
147pub struct TextPrompts {
148    pub text_prompts: Vec<TextPrompt>,
149}
150
151#[derive(Debug, Serialize, Default, Clone, PartialEq, Builder)]
152#[builder(name = "TextToImageRequestBodyArgs")]
153#[builder(pattern = "mutable")]
154#[builder(setter(into, strip_option), default)]
155#[builder(derive(Debug))]
156#[builder(build_fn(error = "StabilityAIError"))]
157pub struct TextToImageRequestBody {
158    /// An array of text prompts to use for generation.
159    ///
160    ///
161    /// Due to how arrays are represented in `multipart/form-data` requests,
162    ///  prompts must adhere to the format `text_prompts[index][text|weight]`,
163    ///
164    /// where `index` is some integer used to tie the text and weight together.
165    /// While `index` does not have to be sequential, duplicate entries
166    /// will override previous entries, so it is recommended to use sequential
167    /// indices.
168    ///
169    ///
170    /// Given a text prompt with the text `A lighthouse on a cliff` and a weight
171    /// of `0.5`, it would be represented as:
172    ///
173    /// `text_prompts[0][text]: "A lighthouse on a cliff"`
174    ///
175    /// `text_prompts[0][weight]: 0.5`
176    ///
177    ///
178    /// To add another prompt to that request simply provide the values
179    /// under a new `index`:
180    ///
181    ///
182    /// `text_prompts[0][text]: "A lighthouse on a cliff"`
183    ///
184    /// `text_prompts[0][weight]: 0.5`
185    ///
186    /// `text_prompts[1][text]: "land, ground, dirt, grass"`
187    ///
188    /// `text_prompts[1][weight]: -0.9`
189    #[serde(flatten)]
190    pub text_prompts: TextPrompts,
191
192    /// Height of the image in pixels.  Must be in increments of 64 and pass the
193    /// following validation:
194    ///
195    /// - For 512 engines: 262,144 ≤ `height * width` ≤ 1,048,576
196    ///
197    /// - For 768 engines: 589,824 ≤ `height * width` ≤ 1,048,576
198    ///
199    /// - For SDXL Beta: can be as low as 128 and as high as 896 as long as `width`
200    /// is not greater than 512. If `width` is greater than 512 then this can
201    /// be _at most_ 512.
202    ///
203    /// - For SDXL v0.9: valid dimensions are 1024x1024, 1152x896, 1216x832,
204    /// 1344x768, 1536x640, 640x1536, 768x1344, 832x1216, or 896x1152
205    ///
206    /// - For SDXL v1.0: valid dimensions are the same as SDXL v0.9
207    #[serde(skip_serializing_if = "Option::is_none")]
208    pub height: Option<u16>,
209
210    /// Width of the image in pixels.  Must be in increments of 64 and pass the
211    /// following validation:
212    ///
213    /// - For 512 engines: 262,144 ≤ `height * width` ≤ 1,048,576
214    ///
215    /// - For 768 engines: 589,824 ≤ `height * width` ≤ 1,048,576
216    ///
217    /// - For SDXL Beta: can be as low as 128 and as high as 896 as long as `height`
218    /// is not greater than 512. If `height` is greater than 512 then this can be _at most_ 512.
219    ///
220    /// - For SDXL v0.9: valid dimensions are 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640,
221    /// 640x1536, 768x1344, 832x1216, or 896x1152
222    ///
223    /// - For SDXL v1.0: valid dimensions are the same as SDXL v0.9
224    #[serde(skip_serializing_if = "Option::is_none")]
225    pub width: Option<u16>,
226
227    /// How strictly the diffusion process adheres to the prompt text
228    /// (higher values keep your image closer to your prompt)
229    #[serde(skip_serializing_if = "Option::is_none")]
230    pub cfg_scale: Option<u8>,
231
232    #[serde(skip_serializing_if = "Option::is_none")]
233    pub clip_guidance_preset: Option<ClipGuidancePreset>,
234
235    /// Which sampler to use for the diffusion process.
236    /// If this value is omitted we'll automatically select
237    /// an appropriate sampler for you.
238    #[serde(skip_serializing_if = "Option::is_none")]
239    pub sampler: Option<Sampler>,
240
241    /// Number of images to generate
242    #[serde(skip_serializing_if = "Option::is_none")]
243    pub samples: Option<u8>,
244
245    /// Random noise seed (omit this option or use `0` for a random seed)
246    #[serde(skip_serializing_if = "Option::is_none")]
247    pub seed: Option<u32>,
248
249    /// Number of diffusion steps to run
250    #[serde(skip_serializing_if = "Option::is_none")]
251    pub steps: Option<u32>,
252
253    /// Pass in a style preset to guide the image model towards a particular style.
254    ///
255    /// This list of style presets is subject to change.
256    #[serde(skip_serializing_if = "Option::is_none")]
257    pub style_preset: Option<StylePreset>,
258
259    /// Extra parameters passed to the engine.
260    ///
261    /// These parameters are used for in-development or experimental features
262    /// and may change without warning, so please use with caution.
263    #[serde(skip_serializing_if = "Option::is_none")]
264    pub extras: Option<serde_json::Value>,
265}
266
267#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
268pub enum FinishReason {
269    #[serde(rename = "CONTENT_FILTERED")]
270    ContentFiltered,
271    #[serde(rename = "ERROR")]
272    Error,
273    #[serde(rename = "SUCCESS")]
274    Success,
275}
276
277#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
278pub struct Artifacts {
279    pub artifacts: Vec<Arc<Image>>,
280}
281
282#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
283pub struct Image {
284    pub base64: String,
285    #[serde(rename = "finishReason")]
286    pub finish_reason: FinishReason,
287    pub seed: i64,
288}
289
290#[derive(Debug, Deserialize, Default, Serialize, Clone, PartialEq)]
291pub enum InitImageMode {
292    #[default]
293    #[serde(rename = "IMAGE_STRENGTH")]
294    ImageStrength,
295    #[serde(rename = "STEP_SCHEDULE")]
296    StepSchedule,
297}
298
299#[derive(Debug, Default, Clone, PartialEq)]
300pub struct InitImage {
301    pub path: PathBuf,
302}
303
304#[derive(Debug, Default, Clone, PartialEq, Builder)]
305#[builder(name = "ImageToImageRequestBodyArgs")]
306#[builder(pattern = "mutable")]
307#[builder(setter(into, strip_option), default)]
308#[builder(derive(Debug))]
309#[builder(build_fn(error = "StabilityAIError"))]
310pub struct ImageToImageRequestBody {
311    pub text_prompts: TextPrompts,
312
313    pub init_image: InitImage,
314
315    /// Whether to use `image_strength` or `step_schedule_*` to control how
316    /// much influence the `init_image` has on the result.
317    pub init_image_mode: Option<InitImageMode>,
318
319    /// How much influence the `init_image` has on the diffusion process.
320    /// Values close to `1` will yield images very similar to the `init_image`
321    ///  while values close to `0` will yield images wildly different than
322    /// the `init_image`. The behavior of this is meant to mirror DreamStudio's
323    ///  \"Image Strength\" slider.  <br/> <br/> This parameter is just an
324    /// alternate way to set `step_schedule_start`, which is done via the
325    /// calculation `1 - image_strength`. For example, passing in an Image
326    /// Strength of 35% (`0.35`) would result in a `step_schedule_start` of
327    /// `0.65`.\n"
328    pub image_strength: Option<f64>,
329
330    /// Skips a proportion of the start of the diffusion steps, allowing the
331    /// init_image to influence the final generated image.  Lower values will
332    ///  result in more influence from the init_image, while higher values will
333    ///  result in more influence from the diffusion steps.  (e.g. a value
334    /// of `0` would simply return you the init_image, where a value of `1`
335    /// would return you a completely different image.)
336    pub step_schedule_start: Option<f64>,
337
338    /// Skips a proportion of the end of the diffusion steps, allowing the
339    /// init_image to influence the final generated image.  Lower values will
340    ///  result in more influence from the init_image, while higher values will
341    /// result in more influence from the diffusion steps.
342    pub step_schedule_end: Option<f64>,
343
344    /// How strictly the diffusion process adheres to the prompt text
345    /// (higher values keep your image closer to your prompt)
346    pub cfg_scale: Option<u8>,
347
348    pub clip_guidance_preset: Option<ClipGuidancePreset>,
349
350    /// Which sampler to use for the diffusion process.
351    /// If this value is omitted we'll automatically select
352    /// an appropriate sampler for you.
353    pub sampler: Option<Sampler>,
354
355    /// Number of images to generate
356    pub samples: Option<u8>,
357
358    /// Random noise seed (omit this option or use `0` for a random seed)
359    pub seed: Option<u32>,
360
361    /// Number of diffusion steps to run
362    pub steps: Option<u32>,
363
364    /// Pass in a style preset to guide the image model towards a particular style.
365    ///
366    /// This list of style presets is subject to change.
367    pub style_preset: Option<StylePreset>,
368
369    /// Extra parameters passed to the engine.
370    ///
371    /// These parameters are used for in-development or experimental features
372    /// and may change without warning, so please use with caution.
373    pub extras: Option<serde_json::Value>,
374}
375
376#[derive(Debug, Default, Clone, PartialEq)]
377pub struct InputImage {
378    pub path: PathBuf,
379}
380
381#[derive(Debug, Clone, PartialEq)]
382pub enum ImageToImageUpscaleBody {
383    LatentUpscalerUpscaleRequestBody(LatentUpscalerUpscaleRequestBody),
384    RealESRGANUpscaleRequestBody(RealESRGANUpscaleRequestBody),
385}
386
387#[derive(Debug, Default, Clone, PartialEq, Builder)]
388#[builder(name = "RealESRGANUpscaleRequestBodyArgs")]
389#[builder(pattern = "mutable")]
390#[builder(setter(into, strip_option), default)]
391#[builder(derive(Debug))]
392#[builder(build_fn(error = "StabilityAIError"))]
393pub struct RealESRGANUpscaleRequestBody {
394    pub image: InputImage,
395
396    /// Desired height of the output image.
397    /// Only one of `width` or `height` may be specified.
398    pub height: Option<u16>,
399
400    /// Desired width of the output image.
401    /// Only one of `width` or `height` may be specified.
402    pub width: Option<u16>,
403}
404
405#[derive(Debug, Default, Clone, PartialEq, Builder)]
406#[builder(name = "LatentUpscalerUpscaleRequestBodyArgs")]
407#[builder(pattern = "mutable")]
408#[builder(setter(into, strip_option), default)]
409#[builder(derive(Debug))]
410#[builder(build_fn(error = "StabilityAIError"))]
411pub struct LatentUpscalerUpscaleRequestBody {
412    pub image: InputImage,
413
414    pub text_prompts: Option<TextPrompts>,
415
416    /// Desired height of the output image.
417    /// Only one of `width` or `height` may be specified.
418    pub height: Option<u16>,
419
420    /// Desired width of the output image.
421    /// Only one of `width` or `height` may be specified.
422    pub width: Option<u16>,
423
424    /// How strictly the diffusion process adheres to the prompt text
425    /// (higher values keep your image closer to your prompt)
426    pub cfg_scale: Option<u8>,
427
428    /// Random noise seed (omit this option or use `0` for a random seed)
429    pub seed: Option<u32>,
430
431    /// Number of diffusion steps to run
432    pub steps: Option<u32>,
433}
434
435#[derive(Debug, Deserialize, Default, Serialize, Clone, PartialEq)]
436pub enum MaskSource {
437    #[default]
438    #[serde(rename = "MASK_IMAGE_BLACK")]
439    MaskImageBlack,
440    #[serde(rename = "MASK_IMAGE_WHITE")]
441    MaskImageWhite,
442    #[serde(rename = "INIT_IMAGE_ALPHA")]
443    InitImageAlpha,
444}
445
446#[derive(Debug, Default, Clone, PartialEq)]
447pub struct MaskImage {
448    pub path: PathBuf,
449}
450
451#[derive(Debug, Default, Clone, PartialEq, Builder)]
452#[builder(name = "MaskingRequestBodyArgs")]
453#[builder(pattern = "mutable")]
454#[builder(setter(into, strip_option), default)]
455#[builder(derive(Debug))]
456#[builder(build_fn(error = "StabilityAIError"))]
457pub struct MaskingRequestBody {
458    /// Image used to initialize the diffusion process, in lieu of random noise.
459    pub init_image: InitImage,
460
461    /// For any given pixel, the mask determines the strength of generation
462    /// on a linear scale.  This parameter determines where to source the
463    /// mask from:
464    ///
465    /// - `MASK_IMAGE_WHITE` will use the white pixels of the
466    /// mask_image as the mask, where white pixels are completely replaced
467    /// and black pixels are unchanged
468    ///
469    /// - `MASK_IMAGE_BLACK` will use the
470    /// black pixels of the mask_image as the mask, where black pixels are
471    /// completely replaced and white pixels are unchanged
472    ///
473    /// - `INIT_IMAGE_ALPHA` will use the alpha channel of the init_image
474    /// as the mask, where fully transparent pixels are completely replaced
475    /// and fully opaque pixels are unchanged
476    pub mask_source: MaskSource,
477
478    /// Optional grayscale mask that allows for influence over which pixels
479    /// are eligible for diffusion and at what strength. Must be the same
480    /// dimensions as the `init_image`. Use the `mask_source` option to
481    /// specify whether the white or black pixels should be inpainted.
482    pub mask_image: Option<MaskImage>,
483
484    pub text_prompts: TextPrompts,
485
486    /// How strictly the diffusion process adheres to the prompt text
487    /// (higher values keep your image closer to your prompt)
488    pub cfg_scale: Option<u8>,
489
490    pub clip_guidance_preset: Option<ClipGuidancePreset>,
491
492    /// Which sampler to use for the diffusion process.
493    /// If this value is omitted we'll automatically select
494    /// an appropriate sampler for you.
495    pub sampler: Option<Sampler>,
496
497    /// Number of images to generate
498    pub samples: Option<u8>,
499
500    /// Random noise seed (omit this option or use `0` for a random seed)
501    pub seed: Option<u32>,
502
503    /// Number of diffusion steps to run
504    pub steps: Option<u32>,
505
506    /// Pass in a style preset to guide the image model towards a particular style.
507    ///
508    /// This list of style presets is subject to change.
509    pub style_preset: Option<StylePreset>,
510
511    /// Extra parameters passed to the engine.
512    ///
513    /// These parameters are used for in-development or experimental features
514    /// and may change without warning, so please use with caution.
515    pub extras: Option<serde_json::Value>,
516}