gproxy_protocol/transform/openai/create_image/gemini/
utils.rs1use crate::gemini::count_tokens::types as gt;
2use crate::gemini::generate_content::response::ResponseBody as GeminiGenerateContentResponseBody;
3use crate::gemini::generate_content::types as gct;
4use crate::gemini::types::GeminiResponseHeaders;
5use crate::openai::create_image::types as it;
6use crate::openai::create_image_edit::types as iet;
7use crate::openai::types::OpenAiResponseHeaders;
8use crate::transform::utils::TransformError;
9
10#[derive(Debug, Clone, PartialEq)]
11pub struct GeminiGeneratedImageOutput {
12 pub image: it::OpenAiGeneratedImage,
13 pub output_format: Option<it::OpenAiImageOutputFormat>,
14}
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct GeminiInlineImageOutput {
18 pub candidate_index: u32,
19 pub part_index: usize,
20 pub b64_json: String,
21 pub output_format: it::OpenAiImageOutputFormat,
22}
23
24fn gemini_image_config(aspect_ratio: &str) -> gt::GeminiImageConfig {
25 gt::GeminiImageConfig {
26 aspect_ratio: Some(aspect_ratio.to_string()),
27 image_size: Some("1K".to_string()),
28 }
29}
30
31pub(crate) fn gemini_image_config_from_create_image_size(
32 size: Option<it::OpenAiImageSize>,
33) -> Result<Option<gt::GeminiImageConfig>, TransformError> {
34 match size {
35 Some(it::OpenAiImageSize::Auto) | None => Ok(None),
36 Some(it::OpenAiImageSize::S1024x1024) => Ok(Some(gemini_image_config("1:1"))),
37 Some(it::OpenAiImageSize::S1536x1024) => Ok(Some(gemini_image_config("3:2"))),
38 Some(it::OpenAiImageSize::S1024x1536) => Ok(Some(gemini_image_config("2:3"))),
39 Some(
40 it::OpenAiImageSize::S256x256
41 | it::OpenAiImageSize::S512x512
42 | it::OpenAiImageSize::S1792x1024
43 | it::OpenAiImageSize::S1024x1792,
44 ) => Err(TransformError::not_implemented(
45 "cannot convert OpenAI image request with unsupported size to Gemini generateContent request",
46 )),
47 }
48}
49
50pub(crate) fn gemini_image_config_from_create_image_edit_size(
51 size: Option<iet::OpenAiImageEditSize>,
52) -> Option<gt::GeminiImageConfig> {
53 match size {
54 Some(iet::OpenAiImageEditSize::Auto) | None => None,
55 Some(iet::OpenAiImageEditSize::S1024x1024) => Some(gemini_image_config("1:1")),
56 Some(iet::OpenAiImageEditSize::S1536x1024) => Some(gemini_image_config("3:2")),
57 Some(iet::OpenAiImageEditSize::S1024x1536) => Some(gemini_image_config("2:3")),
58 }
59}
60
61fn parse_base64_data_url(value: &str) -> Result<gt::GeminiBlob, TransformError> {
62 let payload = value.strip_prefix("data:").ok_or(TransformError::not_implemented(
63 "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
64 ))?;
65 let (metadata, data) = payload.split_once(',').ok_or(TransformError::not_implemented(
66 "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
67 ))?;
68 let mime_type = metadata
69 .strip_suffix(";base64")
70 .ok_or(TransformError::not_implemented(
71 "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
72 ))?;
73
74 if mime_type.is_empty() || data.is_empty() {
75 return Err(TransformError::not_implemented(
76 "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
77 ));
78 }
79
80 Ok(gt::GeminiBlob {
81 mime_type: mime_type.to_string(),
82 data: data.to_string(),
83 })
84}
85
86pub(crate) fn gemini_part_from_openai_edit_input_image(
87 image: iet::OpenAiImageEditInputImage,
88) -> Result<gt::GeminiPart, TransformError> {
89 if image.file_id.is_some() {
90 return Err(TransformError::not_implemented(
91 "cannot convert OpenAI image edit request with file_id input image to Gemini generateContent request",
92 ));
93 }
94
95 let image_url = image.image_url.ok_or(TransformError::not_implemented(
96 "cannot convert OpenAI image edit request without image_url input image to Gemini generateContent request",
97 ))?;
98
99 if image_url.is_empty() {
100 return Err(TransformError::not_implemented(
101 "cannot convert OpenAI image edit request without image_url input image to Gemini generateContent request",
102 ));
103 }
104
105 if image_url.starts_with("data:") {
106 return Ok(gt::GeminiPart {
107 inline_data: Some(parse_base64_data_url(&image_url)?),
108 ..gt::GeminiPart::default()
109 });
110 }
111
112 Ok(gt::GeminiPart {
113 file_data: Some(gt::GeminiFileData {
114 mime_type: None,
115 file_uri: image_url,
116 }),
117 ..gt::GeminiPart::default()
118 })
119}
120
121pub(crate) fn openai_response_headers_from_gemini(
122 headers: GeminiResponseHeaders,
123) -> OpenAiResponseHeaders {
124 OpenAiResponseHeaders {
125 extra: headers.extra,
126 }
127}
128
129pub(crate) fn openai_output_format_from_mime(
130 mime_type: &str,
131) -> Option<it::OpenAiImageOutputFormat> {
132 match mime_type.to_ascii_lowercase().as_str() {
133 "image/png" => Some(it::OpenAiImageOutputFormat::Png),
134 "image/jpeg" | "image/jpg" => Some(it::OpenAiImageOutputFormat::Jpeg),
135 "image/webp" => Some(it::OpenAiImageOutputFormat::Webp),
136 _ => None,
137 }
138}
139
140pub(crate) fn gemini_generated_image_outputs_from_response(
141 body: &GeminiGenerateContentResponseBody,
142) -> Vec<GeminiGeneratedImageOutput> {
143 let mut outputs = Vec::new();
144
145 let Some(candidates) = body.candidates.as_ref() else {
146 return outputs;
147 };
148
149 for candidate in candidates {
150 let Some(content) = candidate.content.as_ref() else {
151 continue;
152 };
153
154 for part in &content.parts {
155 if let Some(inline_data) = part.inline_data.as_ref()
156 && inline_data.mime_type.starts_with("image/")
157 && !inline_data.data.is_empty()
158 {
159 outputs.push(GeminiGeneratedImageOutput {
160 image: it::OpenAiGeneratedImage {
161 b64_json: Some(inline_data.data.clone()),
162 revised_prompt: None,
163 url: None,
164 },
165 output_format: openai_output_format_from_mime(&inline_data.mime_type),
166 });
167 }
168
169 if let Some(file_data) = part.file_data.as_ref()
170 && !file_data.file_uri.is_empty()
171 && file_data
172 .mime_type
173 .as_deref()
174 .is_none_or(|mime_type| mime_type.starts_with("image/"))
175 {
176 outputs.push(GeminiGeneratedImageOutput {
177 image: it::OpenAiGeneratedImage {
178 b64_json: None,
179 revised_prompt: None,
180 url: Some(file_data.file_uri.clone()),
181 },
182 output_format: file_data
183 .mime_type
184 .as_deref()
185 .and_then(openai_output_format_from_mime),
186 });
187 }
188 }
189 }
190
191 outputs
192}
193
194pub fn gemini_inline_image_outputs_from_response(
195 body: &GeminiGenerateContentResponseBody,
196) -> Vec<GeminiInlineImageOutput> {
197 let mut outputs = Vec::new();
198
199 let Some(candidates) = body.candidates.as_ref() else {
200 return outputs;
201 };
202
203 for (candidate_pos, candidate) in candidates.iter().enumerate() {
204 let Some(content) = candidate.content.as_ref() else {
205 continue;
206 };
207 let candidate_index = candidate.index.unwrap_or(candidate_pos as u32);
208
209 for (part_index, part) in content.parts.iter().enumerate() {
210 let Some(inline_data) = part.inline_data.as_ref() else {
211 continue;
212 };
213 if !inline_data.mime_type.starts_with("image/") || inline_data.data.is_empty() {
214 continue;
215 }
216 let Some(output_format) = openai_output_format_from_mime(&inline_data.mime_type) else {
217 continue;
218 };
219 outputs.push(GeminiInlineImageOutput {
220 candidate_index,
221 part_index,
222 b64_json: inline_data.data.clone(),
223 output_format,
224 });
225 }
226 }
227
228 outputs
229}
230
231fn modality_token_count(
232 details: Option<&Vec<gt::GeminiModalityTokenCount>>,
233 modality: gt::GeminiModality,
234) -> u64 {
235 details
236 .into_iter()
237 .flat_map(|details| details.iter())
238 .filter(|detail| detail.modality == modality)
239 .map(|detail| detail.token_count)
240 .sum()
241}
242
243pub(crate) fn openai_image_usage_from_gemini(
244 usage: Option<&gct::GeminiUsageMetadata>,
245) -> Option<it::OpenAiImageUsage> {
246 let usage = usage?;
247
248 let input_details = it::OpenAiImageTokenDetails {
249 image_tokens: modality_token_count(
250 usage.prompt_tokens_details.as_ref(),
251 gt::GeminiModality::Image,
252 ),
253 text_tokens: modality_token_count(
254 usage.prompt_tokens_details.as_ref(),
255 gt::GeminiModality::Text,
256 ),
257 };
258 let output_details = it::OpenAiImageTokenDetails {
259 image_tokens: modality_token_count(
260 usage.candidates_tokens_details.as_ref(),
261 gt::GeminiModality::Image,
262 ),
263 text_tokens: modality_token_count(
264 usage.candidates_tokens_details.as_ref(),
265 gt::GeminiModality::Text,
266 ),
267 };
268
269 let input_tokens = usage
270 .prompt_token_count
271 .unwrap_or(input_details.image_tokens + input_details.text_tokens);
272 let output_tokens = usage
273 .candidates_token_count
274 .unwrap_or(output_details.image_tokens + output_details.text_tokens);
275
276 Some(it::OpenAiImageUsage {
277 input_tokens,
278 input_tokens_details: input_details,
279 output_tokens,
280 total_tokens: usage
281 .total_token_count
282 .unwrap_or(input_tokens.saturating_add(output_tokens)),
283 output_tokens_details: usage
284 .candidates_tokens_details
285 .as_ref()
286 .map(|_| output_details),
287 })
288}
289
290pub fn best_effort_openai_image_usage_from_gemini(
291 usage: Option<&gct::GeminiUsageMetadata>,
292) -> it::OpenAiImageUsage {
293 openai_image_usage_from_gemini(usage).unwrap_or(it::OpenAiImageUsage {
294 input_tokens: 0,
295 input_tokens_details: it::OpenAiImageTokenDetails {
296 image_tokens: 0,
297 text_tokens: 0,
298 },
299 output_tokens: 0,
300 total_tokens: 0,
301 output_tokens_details: None,
302 })
303}
304
305pub(crate) fn create_image_response_body_from_gemini_response(
306 body: GeminiGenerateContentResponseBody,
307) -> Result<it::OpenAiCreateImageResponseBody, TransformError> {
308 let outputs = gemini_generated_image_outputs_from_response(&body);
309 if outputs.is_empty() {
310 return Err(TransformError::not_implemented(
311 "cannot convert Gemini generateContent response without image output to OpenAI create image response",
312 ));
313 }
314
315 let mut data = Vec::with_capacity(outputs.len());
316 let mut common_output_format: Option<Option<it::OpenAiImageOutputFormat>> = None;
317 let mut same_output_format = true;
318
319 for output in outputs {
320 if let Some(existing) = common_output_format.as_ref() {
321 if *existing != output.output_format {
322 same_output_format = false;
323 }
324 } else {
325 common_output_format = Some(output.output_format.clone());
326 }
327 data.push(output.image);
328 }
329
330 Ok(it::OpenAiCreateImageResponseBody {
331 created: 0,
332 background: None,
333 data: Some(data),
334 output_format: if same_output_format {
335 common_output_format.flatten()
336 } else {
337 None
338 },
339 quality: None,
340 size: None,
341 usage: openai_image_usage_from_gemini(body.usage_metadata.as_ref()),
342 })
343}