Skip to main content

llmsdk_provider/
video_model.rs

1//! Video generation model trait and supporting types.
2//!
3//! Mirrors `@ai-sdk/provider/src/video-model/v4/*` (upstream marks the v4
4//! trait as `Experimental_VideoModelV4`; we drop the `Experimental_` prefix
5//! to keep the Rust trait surface uniform with the other 5 model traits).
6// Rust guideline compliant 2026-02-21
7
8use async_trait::async_trait;
9use bytes::Bytes;
10use serde::{Deserialize, Serialize};
11
12use crate::error::Result;
13use crate::shared::{FileBytes, Headers, ProviderMetadata, ProviderOptions, Warning};
14
15/// Contract every video-generation model implements.
16#[async_trait]
17pub trait VideoModel: Send + Sync + std::fmt::Debug {
18    /// Provider id, e.g. `"xai"`.
19    fn provider(&self) -> &str;
20
21    /// Provider-specific model id, e.g. `"grok-2-video"`.
22    fn model_id(&self) -> &str;
23
24    /// Specification version (currently `"v4"`).
25    fn specification_version(&self) -> &'static str {
26        "v4"
27    }
28
29    /// Maximum videos that can be requested per call.
30    ///
31    /// Most video models only support `n=1` due to computational cost; the
32    /// default returns `Some(1)`.
33    async fn max_videos_per_call(&self) -> Option<u32> {
34        Some(1)
35    }
36
37    /// Generate videos.
38    ///
39    /// # Errors
40    ///
41    /// Returns a [`crate::ProviderError`] when the upstream call fails, the
42    /// generation job times out, or the response is malformed.
43    async fn do_generate(&self, options: VideoOptions) -> Result<VideoResult>;
44}
45
46/// Options for one [`VideoModel::do_generate`] call.
47///
48/// Mirrors `VideoModelV4CallOptions`.
49#[derive(Debug, Clone, Default, Serialize, Deserialize)]
50pub struct VideoOptions {
51    /// Text prompt for the video generation.
52    #[serde(default, skip_serializing_if = "Option::is_none")]
53    pub prompt: Option<String>,
54    /// Number of videos to generate. Default: 1.
55    #[serde(default = "default_n")]
56    pub n: u32,
57    /// Aspect ratio, formatted as `WIDTH:HEIGHT` (e.g. `"16:9"`).
58    #[serde(
59        default,
60        rename = "aspectRatio",
61        skip_serializing_if = "Option::is_none"
62    )]
63    pub aspect_ratio: Option<String>,
64    /// Resolution, formatted as `WIDTHxHEIGHT` (e.g. `"1280x720"`).
65    #[serde(default, skip_serializing_if = "Option::is_none")]
66    pub resolution: Option<String>,
67    /// Duration of the video in seconds.
68    ///
69    /// Serialized as `duration` to match upstream
70    /// `video-model-v4-call-options.ts:36` (`duration: number | undefined`).
71    /// The Rust field keeps the `_seconds` suffix for clarity at the call
72    /// site; only the JSON key differs.
73    #[serde(default, rename = "duration", skip_serializing_if = "Option::is_none")]
74    pub duration_seconds: Option<f64>,
75    /// Frames per second.
76    #[serde(default, skip_serializing_if = "Option::is_none")]
77    pub fps: Option<u32>,
78    /// Seed for deterministic generation.
79    #[serde(default, skip_serializing_if = "Option::is_none")]
80    pub seed: Option<u64>,
81    /// Source image or video for image-to-video / editing endpoints.
82    #[serde(default, skip_serializing_if = "Option::is_none")]
83    pub image: Option<VideoFile>,
84    /// Extra HTTP headers (HTTP providers only).
85    #[serde(default, skip_serializing_if = "Option::is_none")]
86    pub headers: Option<Headers>,
87    /// Provider-specific options.
88    #[serde(
89        default,
90        rename = "providerOptions",
91        skip_serializing_if = "Option::is_none"
92    )]
93    pub provider_options: Option<ProviderOptions>,
94}
95
96fn default_n() -> u32 {
97    1
98}
99
100/// Input image or video for image-to-video / video editing endpoints.
101///
102/// Mirrors `VideoModelV4File`.
103#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
104#[serde(tag = "type", rename_all = "kebab-case")]
105pub enum VideoFile {
106    /// Inline file bytes.
107    File {
108        /// IANA media type (e.g. `"video/mp4"`, `"image/png"`).
109        #[serde(rename = "mediaType")]
110        media_type: String,
111        /// File bytes (raw or base64-encoded).
112        data: FileBytes,
113        /// Provider-specific options.
114        #[serde(
115            default,
116            rename = "providerOptions",
117            skip_serializing_if = "Option::is_none"
118        )]
119        provider_options: Option<ProviderOptions>,
120    },
121    /// URL pointing to the file.
122    Url {
123        /// Absolute URL.
124        url: String,
125        /// Provider-specific options.
126        #[serde(
127            default,
128            rename = "providerOptions",
129            skip_serializing_if = "Option::is_none"
130        )]
131        provider_options: Option<ProviderOptions>,
132    },
133}
134
135/// Result of [`VideoModel::do_generate`].
136///
137/// Mirrors `VideoModelV4Result`.
138#[derive(Debug, Clone)]
139pub struct VideoResult {
140    /// Generated videos.
141    pub videos: Vec<VideoData>,
142    /// Warnings for the call.
143    pub warnings: Vec<Warning>,
144    /// Provider-specific metadata.
145    pub provider_metadata: Option<ProviderMetadata>,
146    /// Response info (telemetry).
147    ///
148    /// Unlike `RequestInfo` / `ResponseInfo` reused elsewhere, this struct
149    /// pins `timestamp` and `model_id` as required fields to match the
150    /// upstream `VideoModelV4Result.response` contract (both are required
151    /// in TS).
152    pub response: VideoResponseInfo,
153}
154
155/// Response metadata for [`VideoModel::do_generate`].
156///
157/// Mirrors `VideoModelV4Result.response`. Unlike [`crate::shared::ResponseInfo`]
158/// the `timestamp` and `model_id` fields are required.
159#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
160pub struct VideoResponseInfo {
161    /// Timestamp for the start of the generated response (ISO-8601 string).
162    pub timestamp: String,
163    /// Model id reported by the provider.
164    #[serde(rename = "modelId")]
165    pub model_id: String,
166    /// Response headers.
167    #[serde(default, skip_serializing_if = "Option::is_none")]
168    pub headers: Option<Headers>,
169}
170
171/// One video returned by the provider.
172///
173/// Mirrors `VideoModelV4VideoData` (tagged union over URL / base64 / binary).
174#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
175#[serde(tag = "type", rename_all = "kebab-case")]
176pub enum VideoData {
177    /// Video available at a URL (most common for video providers).
178    Url {
179        /// Absolute URL to the video file.
180        url: String,
181        /// IANA media type (e.g. `"video/mp4"`).
182        #[serde(rename = "mediaType")]
183        media_type: String,
184    },
185    /// Video as a base64-encoded string.
186    Base64 {
187        /// Base64-encoded payload.
188        data: String,
189        /// IANA media type.
190        #[serde(rename = "mediaType")]
191        media_type: String,
192    },
193    /// Video as raw binary bytes.
194    Binary {
195        /// Raw bytes.
196        #[serde(with = "binary_serde")]
197        data: Bytes,
198        /// IANA media type.
199        #[serde(rename = "mediaType")]
200        media_type: String,
201    },
202}
203
204mod binary_serde {
205    use bytes::Bytes;
206    use serde::{Deserialize, Deserializer, Serializer};
207
208    pub fn serialize<S: Serializer>(b: &Bytes, s: S) -> Result<S::Ok, S::Error> {
209        s.serialize_bytes(b)
210    }
211
212    pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Bytes, D::Error> {
213        let v: Vec<u8> = Vec::deserialize(d)?;
214        Ok(Bytes::from(v))
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221    use serde_json::json;
222
223    #[test]
224    fn options_default_n_is_one() {
225        let v: VideoOptions = serde_json::from_value(json!({})).unwrap();
226        assert_eq!(v.n, 1);
227    }
228
229    #[test]
230    fn options_roundtrip_camelcase() {
231        let v = VideoOptions {
232            prompt: Some("a cat".into()),
233            n: 2,
234            aspect_ratio: Some("16:9".into()),
235            resolution: Some("1920x1080".into()),
236            duration_seconds: Some(5.0),
237            fps: Some(30),
238            seed: Some(42),
239            image: None,
240            headers: None,
241            provider_options: None,
242        };
243        let j = serde_json::to_value(&v).unwrap();
244        assert_eq!(j["aspectRatio"], "16:9");
245        // Mirrors upstream `video-model-v4-call-options.ts:36` — the wire key
246        // is `duration` (no `Seconds` suffix), even though the Rust field is
247        // named `duration_seconds` for call-site clarity.
248        assert_eq!(j["duration"], 5.0);
249        let back: VideoOptions = serde_json::from_value(j).unwrap();
250        assert_eq!(back.aspect_ratio.as_deref(), Some("16:9"));
251        assert_eq!(back.fps, Some(30));
252    }
253
254    #[test]
255    fn file_tagged_correctly() {
256        let f = VideoFile::Url {
257            url: "https://example.com/start.png".into(),
258            provider_options: None,
259        };
260        let j = serde_json::to_value(&f).unwrap();
261        assert_eq!(j["type"], "url");
262    }
263
264    #[test]
265    fn data_tagged_correctly() {
266        let d = VideoData::Url {
267            url: "https://example.com/x.mp4".into(),
268            media_type: "video/mp4".into(),
269        };
270        let j = serde_json::to_value(&d).unwrap();
271        assert_eq!(j["type"], "url");
272        assert_eq!(j["mediaType"], "video/mp4");
273    }
274}