1use serde::{Deserialize, Serialize};
2use time::OffsetDateTime;
3
4pub type MediaArtifactId = String;
5
6pub const FAKE_MEDIA_PROVIDER_ID: &str = "fake";
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
10#[serde(rename_all = "camelCase")]
11pub enum MediaKind {
12 Image,
13 Video,
14 Audio,
15 Other,
16}
17
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
19#[serde(rename_all = "camelCase")]
20pub enum MediaPreviewStrategy {
21 InlineImage,
22 Thumbnail,
23 MetadataOnly,
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
27#[serde(rename_all = "camelCase")]
28pub struct MediaDimensions {
29 pub width: u32,
30 pub height: u32,
31}
32
33#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
36#[serde(rename_all = "camelCase")]
37pub struct MediaGenerationMetadata {
38 pub provider: String,
39 #[serde(default, skip_serializing_if = "Option::is_none")]
40 pub model: Option<String>,
41 #[serde(default, skip_serializing_if = "Option::is_none")]
42 pub revised_prompt: Option<String>,
43 #[serde(default, skip_serializing_if = "Option::is_none")]
45 pub watermark: Option<String>,
46 #[serde(default, skip_serializing_if = "Option::is_none")]
47 pub safety: Option<String>,
48 #[serde(default, skip_serializing_if = "Option::is_none")]
49 pub provider_response_id: Option<String>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
53#[serde(rename_all = "camelCase")]
54pub struct MediaArtifact {
55 pub id: MediaArtifactId,
56 pub kind: MediaKind,
57 pub mime_type: String,
58 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dimensions: Option<MediaDimensions>,
60 #[serde(default, skip_serializing_if = "Option::is_none")]
61 pub duration_millis: Option<u64>,
62 pub byte_size: u64,
63 pub provider: String,
64 pub prompt_hash: String,
65 pub store_path: String,
66 #[serde(default, skip_serializing_if = "Option::is_none")]
67 pub thumbnail_path: Option<String>,
68 #[serde(default, skip_serializing_if = "Option::is_none")]
69 pub generation: Option<MediaGenerationMetadata>,
70 #[serde(with = "time::serde::rfc3339")]
71 pub created_at: OffsetDateTime,
72 #[serde(default)]
73 pub roder_owned: bool,
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
77#[serde(rename_all = "camelCase")]
78pub struct MediaPreview {
79 pub artifact_id: MediaArtifactId,
80 pub strategy: MediaPreviewStrategy,
81 #[serde(default, skip_serializing_if = "Option::is_none")]
82 pub thumbnail_path: Option<String>,
83 pub fallback_label: String,
84 #[serde(default, skip_serializing_if = "Option::is_none")]
85 pub warning: Option<String>,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
89#[serde(rename_all = "camelCase")]
90pub struct MediaAttachment {
91 pub artifact_id: MediaArtifactId,
92 pub mime_type: String,
93 pub data_url: String,
94}
95
96#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
97#[serde(rename_all = "camelCase")]
98pub enum ImageGenerationAction {
99 Auto,
100 Generate,
101 Edit,
102}
103
104#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
106#[serde(rename_all = "camelCase")]
107pub struct MediaImageInput {
108 pub bytes_base64: String,
109 pub mime_type: String,
110}
111
112#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
115#[serde(rename_all = "camelCase")]
116pub struct MediaGenerationRequest {
117 #[serde(default)]
118 pub prompt: String,
119 #[serde(default, skip_serializing_if = "Option::is_none")]
120 pub provider: Option<String>,
121 #[serde(default, skip_serializing_if = "Option::is_none")]
122 pub model: Option<String>,
123 #[serde(default, skip_serializing_if = "Option::is_none")]
124 pub action: Option<ImageGenerationAction>,
125 #[serde(default, skip_serializing_if = "Vec::is_empty")]
128 pub input_artifacts: Vec<MediaArtifactId>,
129 #[serde(default, skip_serializing_if = "Vec::is_empty")]
130 pub input_images: Vec<MediaImageInput>,
131 #[serde(default, skip_serializing_if = "Option::is_none")]
132 pub count: Option<u32>,
133 #[serde(default, skip_serializing_if = "Option::is_none")]
134 pub aspect_ratio: Option<String>,
135 #[serde(default, skip_serializing_if = "Option::is_none")]
137 pub size: Option<String>,
138 #[serde(default, skip_serializing_if = "Option::is_none")]
140 pub image_size: Option<String>,
141 #[serde(default, skip_serializing_if = "Option::is_none")]
142 pub quality: Option<String>,
143 #[serde(default, skip_serializing_if = "Option::is_none")]
144 pub output_format: Option<String>,
145 #[serde(default, skip_serializing_if = "Option::is_none")]
146 pub background: Option<String>,
147 #[serde(default, skip_serializing_if = "Option::is_none")]
149 pub output_compression: Option<u8>,
150 #[serde(default, skip_serializing_if = "Option::is_none")]
151 pub moderation: Option<String>,
152 #[serde(default, skip_serializing_if = "Option::is_none")]
156 pub partial_images: Option<u32>,
157 #[serde(default, skip_serializing_if = "Option::is_none")]
158 pub output_path: Option<String>,
159 #[serde(default, skip_serializing_if = "Option::is_none")]
163 pub provider_options: Option<serde_json::Map<String, serde_json::Value>>,
164}
165
166#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
167#[serde(rename_all = "camelCase")]
168pub struct MediaGenerationUsage {
169 #[serde(default, skip_serializing_if = "Option::is_none")]
170 pub input_tokens: Option<u64>,
171 #[serde(default, skip_serializing_if = "Option::is_none")]
172 pub input_image_tokens: Option<u64>,
173 #[serde(default, skip_serializing_if = "Option::is_none")]
174 pub output_tokens: Option<u64>,
175 #[serde(default, skip_serializing_if = "Option::is_none")]
176 pub total_tokens: Option<u64>,
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
180#[serde(rename_all = "camelCase")]
181pub struct MediaGenerationOutput {
182 pub artifact: MediaArtifact,
183 pub preview: MediaPreview,
184 #[serde(default, skip_serializing_if = "Option::is_none")]
185 pub revised_prompt: Option<String>,
186}
187
188#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
189#[serde(rename_all = "camelCase")]
190pub struct MediaGenerationResponse {
191 pub provider: String,
192 #[serde(default, skip_serializing_if = "Option::is_none")]
193 pub model: Option<String>,
194 pub outputs: Vec<MediaGenerationOutput>,
195 #[serde(default, skip_serializing_if = "Option::is_none")]
196 pub revised_prompt: Option<String>,
197 #[serde(default, skip_serializing_if = "Option::is_none")]
198 pub provider_response_id: Option<String>,
199 #[serde(default, skip_serializing_if = "Option::is_none")]
200 pub usage: Option<MediaGenerationUsage>,
201 #[serde(default, skip_serializing_if = "Option::is_none")]
203 pub watermark: Option<String>,
204 #[serde(default, skip_serializing_if = "Option::is_none")]
205 pub safety: Option<String>,
206 #[serde(default, skip_serializing_if = "Vec::is_empty")]
209 pub output_errors: Vec<String>,
210}
211
212impl MediaGenerationResponse {
213 pub fn primary_artifact(&self) -> Option<&MediaArtifact> {
214 self.outputs.first().map(|output| &output.artifact)
215 }
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
220#[serde(rename_all = "camelCase")]
221pub struct GeneratedImage {
222 pub bytes_base64: String,
223 pub mime_type: String,
224 #[serde(default, skip_serializing_if = "Option::is_none")]
225 pub dimensions: Option<MediaDimensions>,
226 #[serde(default, skip_serializing_if = "Option::is_none")]
227 pub revised_prompt: Option<String>,
228 #[serde(default, skip_serializing_if = "Option::is_none")]
229 pub watermark: Option<String>,
230 #[serde(default, skip_serializing_if = "Option::is_none")]
231 pub safety: Option<String>,
232}
233
234#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
236#[serde(rename_all = "camelCase")]
237pub struct ImageGenerationBatch {
238 pub provider: String,
239 pub model: String,
240 pub images: Vec<GeneratedImage>,
241 #[serde(default, skip_serializing_if = "Option::is_none")]
242 pub provider_response_id: Option<String>,
243 #[serde(default, skip_serializing_if = "Option::is_none")]
244 pub usage: Option<MediaGenerationUsage>,
245 #[serde(default, skip_serializing_if = "Vec::is_empty")]
246 pub output_errors: Vec<String>,
247}
248
249#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
250#[serde(rename_all = "camelCase")]
251pub struct ImageModelDescriptor {
252 pub id: String,
253 pub display_name: String,
254 pub provider: String,
255 #[serde(default)]
256 pub is_default: bool,
257 #[serde(default)]
259 pub legacy: bool,
260 #[serde(default)]
261 pub supports_edit: bool,
262 #[serde(default)]
263 pub supports_multiple_outputs: bool,
264 #[serde(default, skip_serializing_if = "Vec::is_empty")]
265 pub supported_aspect_ratios: Vec<String>,
266 #[serde(default, skip_serializing_if = "Vec::is_empty")]
267 pub supported_sizes: Vec<String>,
268 #[serde(default, skip_serializing_if = "Vec::is_empty")]
269 pub supported_image_sizes: Vec<String>,
270 #[serde(default)]
271 pub supports_transparent_background: bool,
272 #[serde(default)]
273 pub supports_partial_images: bool,
274}
275
276#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
277#[serde(rename_all = "camelCase")]
278pub struct MediaProviderDescriptor {
279 pub id: String,
280 pub display_name: String,
281 #[serde(default)]
282 pub supports_images: bool,
283 #[serde(default)]
284 pub supports_videos: bool,
285 #[serde(default)]
287 pub configured: bool,
288 #[serde(default, skip_serializing_if = "Option::is_none")]
289 pub default_model: Option<String>,
290 #[serde(default, skip_serializing_if = "Vec::is_empty")]
291 pub image_models: Vec<ImageModelDescriptor>,
292}
293
294#[async_trait::async_trait]
298pub trait MediaGeneratorProvider: Send + Sync + 'static {
299 fn provider_id(&self) -> &str;
300
301 fn descriptor(&self) -> MediaProviderDescriptor;
302
303 async fn generate_image(
304 &self,
305 _request: MediaGenerationRequest,
306 ) -> anyhow::Result<ImageGenerationBatch> {
307 anyhow::bail!(
308 "image generation is not supported by provider {}",
309 self.provider_id()
310 )
311 }
312}
313
314pub fn data_url(mime_type: &str, bytes_base64: &str) -> String {
315 format!("data:{mime_type};base64,{bytes_base64}")
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321
322 fn image_artifact() -> MediaArtifact {
323 MediaArtifact {
324 id: "media-image-1".to_string(),
325 kind: MediaKind::Image,
326 mime_type: "image/png".to_string(),
327 dimensions: Some(MediaDimensions {
328 width: 1,
329 height: 1,
330 }),
331 duration_millis: None,
332 byte_size: 67,
333 provider: "fake".to_string(),
334 prompt_hash: "hash".to_string(),
335 store_path: "/tmp/image.png".to_string(),
336 thumbnail_path: Some("/tmp/image.thumb.png".to_string()),
337 generation: None,
338 created_at: OffsetDateTime::UNIX_EPOCH,
339 roder_owned: true,
340 }
341 }
342
343 #[test]
344 fn image_and_video_artifacts_serialize_as_camel_case_metadata() {
345 let image = image_artifact();
346 let video = MediaArtifact {
347 kind: MediaKind::Video,
348 mime_type: "video/mp4".to_string(),
349 duration_millis: Some(1_000),
350 ..image.clone()
351 };
352
353 let value = serde_json::to_value(&image).unwrap();
354 assert_eq!(value["mimeType"], "image/png");
355 assert_eq!(value["dimensions"]["width"], 1);
356 assert_eq!(value["thumbnailPath"], "/tmp/image.thumb.png");
357 assert!(value.get("generation").is_none());
358 assert_eq!(serde_json::to_value(video).unwrap()["durationMillis"], 1000);
359 }
360
361 #[test]
362 fn minimum_text_to_image_request_decodes_from_legacy_arguments() {
363 let request: MediaGenerationRequest =
364 serde_json::from_value(serde_json::json!({ "prompt": "tiny" })).unwrap();
365 assert_eq!(request.prompt, "tiny");
366 assert!(request.provider.is_none());
367 assert!(request.model.is_none());
368 assert!(request.input_artifacts.is_empty());
369 assert!(request.provider_options.is_none());
370
371 let legacy: MediaGenerationRequest = serde_json::from_value(serde_json::json!({
372 "prompt": "tiny",
373 "model": "gpt-image-2",
374 "outputPath": "/tmp/out.png"
375 }))
376 .unwrap();
377 assert_eq!(legacy.model.as_deref(), Some("gpt-image-2"));
378 assert_eq!(legacy.output_path.as_deref(), Some("/tmp/out.png"));
379 }
380
381 #[test]
382 fn image_edit_request_serializes_canonical_camel_case_fields() {
383 let request = MediaGenerationRequest {
384 prompt: "Make this screenshot look like a clean launch graphic".to_string(),
385 provider: Some("openai".to_string()),
386 model: Some("gpt-image-2".to_string()),
387 action: Some(ImageGenerationAction::Edit),
388 input_artifacts: vec!["media-image-123".to_string()],
389 size: Some("1536x1024".to_string()),
390 output_format: Some("png".to_string()),
391 ..MediaGenerationRequest::default()
392 };
393
394 let value = serde_json::to_value(&request).unwrap();
395 assert_eq!(value["provider"], "openai");
396 assert_eq!(value["action"], "edit");
397 assert_eq!(value["inputArtifacts"][0], "media-image-123");
398 assert_eq!(value["size"], "1536x1024");
399 assert_eq!(value["outputFormat"], "png");
400 assert!(value.get("inputImages").is_none());
401 }
402
403 #[test]
404 fn google_style_request_serializes_aspect_ratio_and_image_size() {
405 let request = MediaGenerationRequest {
406 prompt: "A polished product hero image".to_string(),
407 provider: Some("google".to_string()),
408 model: Some("gemini-3-pro-image".to_string()),
409 aspect_ratio: Some("16:9".to_string()),
410 image_size: Some("2K".to_string()),
411 ..MediaGenerationRequest::default()
412 };
413
414 let value = serde_json::to_value(&request).unwrap();
415 assert_eq!(value["aspectRatio"], "16:9");
416 assert_eq!(value["imageSize"], "2K");
417 }
418
419 #[test]
420 fn multi_output_response_round_trips_with_usage_and_metadata() {
421 let artifact = image_artifact();
422 let preview = MediaPreview {
423 artifact_id: artifact.id.clone(),
424 strategy: MediaPreviewStrategy::Thumbnail,
425 thumbnail_path: None,
426 fallback_label: "fake image/png".to_string(),
427 warning: None,
428 };
429 let response = MediaGenerationResponse {
430 provider: "openai".to_string(),
431 model: Some("gpt-image-2".to_string()),
432 outputs: vec![
433 MediaGenerationOutput {
434 artifact: artifact.clone(),
435 preview: preview.clone(),
436 revised_prompt: Some("a tiny test image".to_string()),
437 },
438 MediaGenerationOutput {
439 artifact,
440 preview,
441 revised_prompt: None,
442 },
443 ],
444 revised_prompt: Some("a tiny test image".to_string()),
445 provider_response_id: Some("resp_123".to_string()),
446 usage: Some(MediaGenerationUsage {
447 input_tokens: Some(12),
448 input_image_tokens: None,
449 output_tokens: Some(4_160),
450 total_tokens: Some(4_172),
451 }),
452 watermark: None,
453 safety: None,
454 output_errors: vec!["third output was rejected by moderation".to_string()],
455 };
456
457 let value = serde_json::to_value(&response).unwrap();
458 assert_eq!(value["outputs"].as_array().unwrap().len(), 2);
459 assert_eq!(value["outputs"][0]["revisedPrompt"], "a tiny test image");
460 assert_eq!(value["providerResponseId"], "resp_123");
461 assert_eq!(value["usage"]["totalTokens"], 4_172);
462 assert_eq!(
463 value["outputErrors"][0],
464 "third output was rejected by moderation"
465 );
466 let round_trip: MediaGenerationResponse = serde_json::from_value(value).unwrap();
467 assert_eq!(round_trip, response);
468 }
469
470 #[test]
471 fn partial_stream_preference_and_provider_options_round_trip() {
472 let request: MediaGenerationRequest = serde_json::from_value(serde_json::json!({
473 "prompt": "stream me",
474 "provider": "openai",
475 "partialImages": 2,
476 "providerOptions": { "user": "roder-tests" }
477 }))
478 .unwrap();
479 assert_eq!(request.partial_images, Some(2));
480 assert_eq!(
481 request
482 .provider_options
483 .as_ref()
484 .and_then(|options| options.get("user"))
485 .and_then(|value| value.as_str()),
486 Some("roder-tests")
487 );
488 }
489
490 #[test]
491 fn google_generation_metadata_persists_synthid_watermark() {
492 let mut artifact = image_artifact();
493 artifact.generation = Some(MediaGenerationMetadata {
494 provider: "google".to_string(),
495 model: Some("gemini-3.1-flash-image".to_string()),
496 revised_prompt: None,
497 watermark: Some("synthid".to_string()),
498 safety: None,
499 provider_response_id: None,
500 });
501
502 let value = serde_json::to_value(&artifact).unwrap();
503 assert_eq!(value["generation"]["provider"], "google");
504 assert_eq!(value["generation"]["watermark"], "synthid");
505 let round_trip: MediaArtifact = serde_json::from_value(value).unwrap();
506 assert_eq!(
507 round_trip.generation.unwrap().watermark.as_deref(),
508 Some("synthid")
509 );
510 }
511
512 #[test]
513 fn openai_batch_metadata_round_trips() {
514 let batch = ImageGenerationBatch {
515 provider: "openai".to_string(),
516 model: "gpt-image-2".to_string(),
517 images: vec![GeneratedImage {
518 bytes_base64: "iVBORw0KGgo=".to_string(),
519 mime_type: "image/png".to_string(),
520 dimensions: Some(MediaDimensions {
521 width: 1024,
522 height: 1024,
523 }),
524 revised_prompt: Some("a revised prompt".to_string()),
525 watermark: None,
526 safety: None,
527 }],
528 provider_response_id: Some("img_123".to_string()),
529 usage: Some(MediaGenerationUsage {
530 input_tokens: Some(10),
531 input_image_tokens: Some(0),
532 output_tokens: Some(1_056),
533 total_tokens: Some(1_066),
534 }),
535 output_errors: Vec::new(),
536 };
537
538 let value = serde_json::to_value(&batch).unwrap();
539 assert_eq!(value["images"][0]["revisedPrompt"], "a revised prompt");
540 assert_eq!(value["usage"]["inputImageTokens"], 0);
541 let round_trip: ImageGenerationBatch = serde_json::from_value(value).unwrap();
542 assert_eq!(round_trip, batch);
543 }
544}