Skip to main content

llmsdk_provider/
image_model.rs

1//! Image generation model trait and supporting types.
2//!
3//! Mirrors `@ai-sdk/provider/src/image-model/v4/*`.
4// Rust guideline compliant 2026-02-21
5
6use async_trait::async_trait;
7use bytes::Bytes;
8use serde::{Deserialize, Serialize};
9
10use crate::error::Result;
11use crate::language_model::FilePart;
12use crate::shared::{
13    Headers, ProviderMetadata, ProviderOptions, RequestInfo, ResponseInfo, Warning,
14};
15
16/// Contract every image-generation model implements.
17#[async_trait]
18pub trait ImageModel: Send + Sync + std::fmt::Debug {
19    /// Provider id, e.g. `"openai"`.
20    fn provider(&self) -> &str;
21
22    /// Provider-specific model id, e.g. `"dall-e-3"`.
23    fn model_id(&self) -> &str;
24
25    /// Specification version (currently `"v4"`).
26    ///
27    /// Mirrors `ImageModelV4.specificationVersion` (ai-sdk
28    /// `image-model-v4.ts`). Provider impls inherit the default.
29    fn specification_version(&self) -> &'static str {
30        "v4"
31    }
32
33    /// Maximum images that can be requested per call.
34    async fn max_images_per_call(&self) -> Option<u32> {
35        None
36    }
37
38    /// Generate images.
39    ///
40    /// # Errors
41    ///
42    /// Returns a [`crate::ProviderError`] when the upstream call fails or
43    /// the response is malformed.
44    async fn do_generate(&self, options: ImageOptions) -> Result<ImageResult>;
45}
46
47/// Options for one [`ImageModel::do_generate`] call.
48#[derive(Debug, Clone, Default, Serialize, Deserialize)]
49pub struct ImageOptions {
50    /// Prompt describing the desired image.
51    pub prompt: String,
52    /// Number of images to generate.
53    #[serde(default, skip_serializing_if = "Option::is_none")]
54    pub n: Option<u32>,
55    /// Size, formatted as `WIDTHxHEIGHT` (e.g. `"1024x1024"`).
56    #[serde(default, skip_serializing_if = "Option::is_none")]
57    pub size: Option<String>,
58    /// Aspect ratio (e.g. `"16:9"`).
59    #[serde(
60        default,
61        rename = "aspectRatio",
62        skip_serializing_if = "Option::is_none"
63    )]
64    pub aspect_ratio: Option<String>,
65    /// Random seed for deterministic generation.
66    #[serde(default, skip_serializing_if = "Option::is_none")]
67    pub seed: Option<u64>,
68    /// Source images for editing / variation endpoints.
69    ///
70    /// Plain `do_generate` (text → image) ignores this field. Edit / variation
71    /// endpoints take the first entry as the source; `OpenAI`'s edit endpoint
72    /// accepts multiple files.
73    #[serde(default, skip_serializing_if = "Option::is_none")]
74    pub files: Option<Vec<FilePart>>,
75    /// Optional mask for image edits (transparent regions = areas to edit).
76    #[serde(default, skip_serializing_if = "Option::is_none")]
77    pub mask: Option<FilePart>,
78    /// Extra HTTP headers (HTTP providers only).
79    #[serde(default, skip_serializing_if = "Option::is_none")]
80    pub headers: Option<Headers>,
81    /// Provider-specific options.
82    #[serde(
83        default,
84        rename = "providerOptions",
85        skip_serializing_if = "Option::is_none"
86    )]
87    pub provider_options: Option<ProviderOptions>,
88}
89
90/// Result of [`ImageModel::do_generate`].
91#[derive(Debug, Clone)]
92pub struct ImageResult {
93    /// Generated images.
94    pub images: Vec<GeneratedImage>,
95    /// Warnings for the call, e.g. unsupported settings coerced away.
96    pub warnings: Vec<Warning>,
97    /// Token usage if reported by the provider (e.g. `OpenAI` `gpt-image-1`).
98    pub usage: Option<ImageUsage>,
99    /// Provider-specific metadata.
100    pub provider_metadata: Option<ProviderMetadata>,
101    /// Request info (telemetry).
102    pub request: Option<RequestInfo>,
103    /// Response info (telemetry).
104    pub response: Option<ResponseInfo>,
105}
106
107/// Token usage reported by an image-generation model.
108///
109/// Mirrors `OpenAI`'s `gpt-image-1` response shape; other providers populate the
110/// fields they support and leave the rest `None`.
111#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
112pub struct ImageUsage {
113    /// Total input tokens consumed.
114    #[serde(default, skip_serializing_if = "Option::is_none")]
115    pub input_tokens: Option<u64>,
116    /// Total output tokens emitted.
117    #[serde(default, skip_serializing_if = "Option::is_none")]
118    pub output_tokens: Option<u64>,
119    /// Breakdown of input tokens by modality.
120    #[serde(default, skip_serializing_if = "Option::is_none")]
121    pub input_tokens_details: Option<ImageUsageInputDetails>,
122}
123
124/// Input-token breakdown by modality.
125#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
126pub struct ImageUsageInputDetails {
127    /// Text tokens in the prompt.
128    #[serde(default, skip_serializing_if = "Option::is_none")]
129    pub text_tokens: Option<u64>,
130    /// Image tokens (edits / variations source).
131    #[serde(default, skip_serializing_if = "Option::is_none")]
132    pub image_tokens: Option<u64>,
133}
134
135/// One image returned by the provider.
136#[derive(Debug, Clone)]
137pub struct GeneratedImage {
138    /// Image bytes (typically PNG / JPEG).
139    pub bytes: Bytes,
140    /// IANA media type.
141    pub media_type: String,
142}