Skip to main content

car_inference/tasks/
generate_image.rs

1use serde::{Deserialize, Serialize};
2
3fn default_image_format() -> String {
4    "png".to_string()
5}
6
7/// Request to generate an image from text.
8///
9/// `Default` is implemented manually (not derived) because `format`
10/// has a sensible non-empty default (`"png"`) that
11/// `String::default()` would clobber to `""`. Callers can
12/// `GenerateImageRequest { prompt: "...".into(), ..Default::default() }`
13/// and get a usable shape. Mirrors the `GenerateRequest` ergonomic
14/// fix from #109.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct GenerateImageRequest {
17    pub prompt: String,
18    #[serde(default)]
19    pub model: Option<String>,
20    #[serde(default)]
21    pub negative_prompt: Option<String>,
22    #[serde(default)]
23    pub width: Option<u32>,
24    #[serde(default)]
25    pub height: Option<u32>,
26    #[serde(default)]
27    pub steps: Option<u32>,
28    #[serde(default)]
29    pub guidance: Option<f32>,
30    #[serde(default)]
31    pub seed: Option<u64>,
32    #[serde(default)]
33    pub output_path: Option<String>,
34    #[serde(default = "default_image_format")]
35    pub format: String,
36
37    /// Input image to transform (img2img). Backends that don't
38    /// support image conditioning return an
39    /// `InferenceError::UnsupportedMode` if this is set.
40    #[serde(default)]
41    pub input_image_path: Option<String>,
42
43    /// Number of variants to generate in one call. `None` (or 1)
44    /// produces a single image — matches the original
45    /// `generate_image` contract. Backends that don't support
46    /// batching either loop internally or error if `> 1`. Honoured
47    /// by [`crate::InferenceEngine::generate_image_batch`]; the
48    /// scalar [`crate::InferenceEngine::generate_image`] forces it
49    /// to 1.
50    #[serde(default)]
51    pub variant_count: Option<u32>,
52
53    /// Anchor for cross-call continuity — instructs the model to
54    /// generate variants in the visual neighbourhood of this image.
55    /// Currently meaningful only on backends that natively support
56    /// the concept (e.g. gpt-image-2). Other backends ignore the
57    /// hint or return `UnsupportedMode`.
58    #[serde(default)]
59    pub parent_image_path: Option<String>,
60}
61
62impl Default for GenerateImageRequest {
63    fn default() -> Self {
64        Self {
65            prompt: String::new(),
66            model: None,
67            negative_prompt: None,
68            width: None,
69            height: None,
70            steps: None,
71            guidance: None,
72            seed: None,
73            output_path: None,
74            format: default_image_format(),
75            input_image_path: None,
76            variant_count: None,
77            parent_image_path: None,
78        }
79    }
80}
81
82/// Image generation result.
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct GenerateImageResult {
85    pub image_path: String,
86    pub media_type: String,
87    #[serde(default, skip_serializing_if = "Option::is_none")]
88    pub model_used: Option<String>,
89}