Skip to main content

gproxy_protocol/transform/openai/create_image/gemini/
utils.rs

1use crate::gemini::count_tokens::types as gt;
2use crate::gemini::generate_content::response::ResponseBody as GeminiGenerateContentResponseBody;
3use crate::gemini::generate_content::types as gct;
4use crate::gemini::types::GeminiResponseHeaders;
5use crate::openai::create_image::types as it;
6use crate::openai::create_image_edit::types as iet;
7use crate::openai::types::OpenAiResponseHeaders;
8use crate::transform::utils::TransformError;
9
10#[derive(Debug, Clone, PartialEq)]
11pub struct GeminiGeneratedImageOutput {
12    pub image: it::OpenAiGeneratedImage,
13    pub output_format: Option<it::OpenAiImageOutputFormat>,
14}
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct GeminiInlineImageOutput {
18    pub candidate_index: u32,
19    pub part_index: usize,
20    pub b64_json: String,
21    pub output_format: it::OpenAiImageOutputFormat,
22}
23
24fn gemini_image_config(aspect_ratio: &str) -> gt::GeminiImageConfig {
25    gt::GeminiImageConfig {
26        aspect_ratio: Some(aspect_ratio.to_string()),
27        image_size: Some("1K".to_string()),
28    }
29}
30
31pub(crate) fn gemini_image_config_from_create_image_size(
32    size: Option<it::OpenAiImageSize>,
33) -> Result<Option<gt::GeminiImageConfig>, TransformError> {
34    match size {
35        Some(it::OpenAiImageSize::Auto) | None => Ok(None),
36        Some(it::OpenAiImageSize::S1024x1024) => Ok(Some(gemini_image_config("1:1"))),
37        Some(it::OpenAiImageSize::S1536x1024) => Ok(Some(gemini_image_config("3:2"))),
38        Some(it::OpenAiImageSize::S1024x1536) => Ok(Some(gemini_image_config("2:3"))),
39        Some(
40            it::OpenAiImageSize::S256x256
41            | it::OpenAiImageSize::S512x512
42            | it::OpenAiImageSize::S1792x1024
43            | it::OpenAiImageSize::S1024x1792,
44        ) => Err(TransformError::not_implemented(
45            "cannot convert OpenAI image request with unsupported size to Gemini generateContent request",
46        )),
47    }
48}
49
50pub(crate) fn gemini_image_config_from_create_image_edit_size(
51    size: Option<iet::OpenAiImageEditSize>,
52) -> Option<gt::GeminiImageConfig> {
53    match size {
54        Some(iet::OpenAiImageEditSize::Auto) | None => None,
55        Some(iet::OpenAiImageEditSize::S1024x1024) => Some(gemini_image_config("1:1")),
56        Some(iet::OpenAiImageEditSize::S1536x1024) => Some(gemini_image_config("3:2")),
57        Some(iet::OpenAiImageEditSize::S1024x1536) => Some(gemini_image_config("2:3")),
58    }
59}
60
61fn parse_base64_data_url(value: &str) -> Result<gt::GeminiBlob, TransformError> {
62    let payload = value.strip_prefix("data:").ok_or(TransformError::not_implemented(
63        "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
64    ))?;
65    let (metadata, data) = payload.split_once(',').ok_or(TransformError::not_implemented(
66        "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
67    ))?;
68    let mime_type = metadata
69        .strip_suffix(";base64")
70        .ok_or(TransformError::not_implemented(
71            "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
72        ))?;
73
74    if mime_type.is_empty() || data.is_empty() {
75        return Err(TransformError::not_implemented(
76            "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
77        ));
78    }
79
80    Ok(gt::GeminiBlob {
81        mime_type: mime_type.to_string(),
82        data: data.to_string(),
83    })
84}
85
86pub(crate) fn gemini_part_from_openai_edit_input_image(
87    image: iet::OpenAiImageEditInputImage,
88) -> Result<gt::GeminiPart, TransformError> {
89    if image.file_id.is_some() {
90        return Err(TransformError::not_implemented(
91            "cannot convert OpenAI image edit request with file_id input image to Gemini generateContent request",
92        ));
93    }
94
95    let image_url = image.image_url.ok_or(TransformError::not_implemented(
96        "cannot convert OpenAI image edit request without image_url input image to Gemini generateContent request",
97    ))?;
98
99    if image_url.is_empty() {
100        return Err(TransformError::not_implemented(
101            "cannot convert OpenAI image edit request without image_url input image to Gemini generateContent request",
102        ));
103    }
104
105    if image_url.starts_with("data:") {
106        return Ok(gt::GeminiPart {
107            inline_data: Some(parse_base64_data_url(&image_url)?),
108            ..gt::GeminiPart::default()
109        });
110    }
111
112    Ok(gt::GeminiPart {
113        file_data: Some(gt::GeminiFileData {
114            mime_type: None,
115            file_uri: image_url,
116        }),
117        ..gt::GeminiPart::default()
118    })
119}
120
121pub(crate) fn openai_response_headers_from_gemini(
122    headers: GeminiResponseHeaders,
123) -> OpenAiResponseHeaders {
124    OpenAiResponseHeaders {
125        extra: headers.extra,
126    }
127}
128
129pub(crate) fn openai_output_format_from_mime(
130    mime_type: &str,
131) -> Option<it::OpenAiImageOutputFormat> {
132    match mime_type.to_ascii_lowercase().as_str() {
133        "image/png" => Some(it::OpenAiImageOutputFormat::Png),
134        "image/jpeg" | "image/jpg" => Some(it::OpenAiImageOutputFormat::Jpeg),
135        "image/webp" => Some(it::OpenAiImageOutputFormat::Webp),
136        _ => None,
137    }
138}
139
140pub(crate) fn gemini_generated_image_outputs_from_response(
141    body: &GeminiGenerateContentResponseBody,
142) -> Vec<GeminiGeneratedImageOutput> {
143    let mut outputs = Vec::new();
144
145    let Some(candidates) = body.candidates.as_ref() else {
146        return outputs;
147    };
148
149    for candidate in candidates {
150        let Some(content) = candidate.content.as_ref() else {
151            continue;
152        };
153
154        for part in &content.parts {
155            if let Some(inline_data) = part.inline_data.as_ref()
156                && inline_data.mime_type.starts_with("image/")
157                && !inline_data.data.is_empty()
158            {
159                outputs.push(GeminiGeneratedImageOutput {
160                    image: it::OpenAiGeneratedImage {
161                        b64_json: Some(inline_data.data.clone()),
162                        revised_prompt: None,
163                        url: None,
164                    },
165                    output_format: openai_output_format_from_mime(&inline_data.mime_type),
166                });
167            }
168
169            if let Some(file_data) = part.file_data.as_ref()
170                && !file_data.file_uri.is_empty()
171                && file_data
172                    .mime_type
173                    .as_deref()
174                    .is_none_or(|mime_type| mime_type.starts_with("image/"))
175            {
176                outputs.push(GeminiGeneratedImageOutput {
177                    image: it::OpenAiGeneratedImage {
178                        b64_json: None,
179                        revised_prompt: None,
180                        url: Some(file_data.file_uri.clone()),
181                    },
182                    output_format: file_data
183                        .mime_type
184                        .as_deref()
185                        .and_then(openai_output_format_from_mime),
186                });
187            }
188        }
189    }
190
191    outputs
192}
193
194pub fn gemini_inline_image_outputs_from_response(
195    body: &GeminiGenerateContentResponseBody,
196) -> Vec<GeminiInlineImageOutput> {
197    let mut outputs = Vec::new();
198
199    let Some(candidates) = body.candidates.as_ref() else {
200        return outputs;
201    };
202
203    for (candidate_pos, candidate) in candidates.iter().enumerate() {
204        let Some(content) = candidate.content.as_ref() else {
205            continue;
206        };
207        let candidate_index = candidate.index.unwrap_or(candidate_pos as u32);
208
209        for (part_index, part) in content.parts.iter().enumerate() {
210            let Some(inline_data) = part.inline_data.as_ref() else {
211                continue;
212            };
213            if !inline_data.mime_type.starts_with("image/") || inline_data.data.is_empty() {
214                continue;
215            }
216            let Some(output_format) = openai_output_format_from_mime(&inline_data.mime_type) else {
217                continue;
218            };
219            outputs.push(GeminiInlineImageOutput {
220                candidate_index,
221                part_index,
222                b64_json: inline_data.data.clone(),
223                output_format,
224            });
225        }
226    }
227
228    outputs
229}
230
231fn modality_token_count(
232    details: Option<&Vec<gt::GeminiModalityTokenCount>>,
233    modality: gt::GeminiModality,
234) -> u64 {
235    details
236        .into_iter()
237        .flat_map(|details| details.iter())
238        .filter(|detail| detail.modality == modality)
239        .map(|detail| detail.token_count)
240        .sum()
241}
242
243pub(crate) fn openai_image_usage_from_gemini(
244    usage: Option<&gct::GeminiUsageMetadata>,
245) -> Option<it::OpenAiImageUsage> {
246    let usage = usage?;
247
248    let input_details = it::OpenAiImageTokenDetails {
249        image_tokens: modality_token_count(
250            usage.prompt_tokens_details.as_ref(),
251            gt::GeminiModality::Image,
252        ),
253        text_tokens: modality_token_count(
254            usage.prompt_tokens_details.as_ref(),
255            gt::GeminiModality::Text,
256        ),
257    };
258    let output_details = it::OpenAiImageTokenDetails {
259        image_tokens: modality_token_count(
260            usage.candidates_tokens_details.as_ref(),
261            gt::GeminiModality::Image,
262        ),
263        text_tokens: modality_token_count(
264            usage.candidates_tokens_details.as_ref(),
265            gt::GeminiModality::Text,
266        ),
267    };
268
269    let input_tokens = usage
270        .prompt_token_count
271        .unwrap_or(input_details.image_tokens + input_details.text_tokens);
272    let output_tokens = usage
273        .candidates_token_count
274        .unwrap_or(output_details.image_tokens + output_details.text_tokens);
275
276    Some(it::OpenAiImageUsage {
277        input_tokens,
278        input_tokens_details: input_details,
279        output_tokens,
280        total_tokens: usage
281            .total_token_count
282            .unwrap_or(input_tokens.saturating_add(output_tokens)),
283        output_tokens_details: usage
284            .candidates_tokens_details
285            .as_ref()
286            .map(|_| output_details),
287    })
288}
289
290pub fn best_effort_openai_image_usage_from_gemini(
291    usage: Option<&gct::GeminiUsageMetadata>,
292) -> it::OpenAiImageUsage {
293    openai_image_usage_from_gemini(usage).unwrap_or(it::OpenAiImageUsage {
294        input_tokens: 0,
295        input_tokens_details: it::OpenAiImageTokenDetails {
296            image_tokens: 0,
297            text_tokens: 0,
298        },
299        output_tokens: 0,
300        total_tokens: 0,
301        output_tokens_details: None,
302    })
303}
304
305pub(crate) fn create_image_response_body_from_gemini_response(
306    body: GeminiGenerateContentResponseBody,
307) -> Result<it::OpenAiCreateImageResponseBody, TransformError> {
308    let outputs = gemini_generated_image_outputs_from_response(&body);
309    if outputs.is_empty() {
310        return Err(TransformError::not_implemented(
311            "cannot convert Gemini generateContent response without image output to OpenAI create image response",
312        ));
313    }
314
315    let mut data = Vec::with_capacity(outputs.len());
316    let mut common_output_format: Option<Option<it::OpenAiImageOutputFormat>> = None;
317    let mut same_output_format = true;
318
319    for output in outputs {
320        if let Some(existing) = common_output_format.as_ref() {
321            if *existing != output.output_format {
322                same_output_format = false;
323            }
324        } else {
325            common_output_format = Some(output.output_format.clone());
326        }
327        data.push(output.image);
328    }
329
330    Ok(it::OpenAiCreateImageResponseBody {
331        created: 0,
332        background: None,
333        data: Some(data),
334        output_format: if same_output_format {
335            common_output_format.flatten()
336        } else {
337            None
338        },
339        quality: None,
340        size: None,
341        usage: openai_image_usage_from_gemini(body.usage_metadata.as_ref()),
342    })
343}