gproxy-protocol 1.0.20

Wire-format types and cross-protocol transforms for Claude, OpenAI, and Gemini LLM APIs.
Documentation
use crate::gemini::count_tokens::types as gt;
use crate::gemini::generate_content::response::ResponseBody as GeminiGenerateContentResponseBody;
use crate::gemini::generate_content::types as gct;
use crate::gemini::types::GeminiResponseHeaders;
use crate::openai::create_image::types as it;
use crate::openai::create_image_edit::types as iet;
use crate::openai::types::OpenAiResponseHeaders;
use crate::transform::utils::TransformError;

#[derive(Debug, Clone, PartialEq)]
pub struct GeminiGeneratedImageOutput {
    pub image: it::OpenAiGeneratedImage,
    pub output_format: Option<it::OpenAiImageOutputFormat>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GeminiInlineImageOutput {
    pub candidate_index: u32,
    pub part_index: usize,
    pub b64_json: String,
    pub output_format: it::OpenAiImageOutputFormat,
}

fn gemini_image_config(aspect_ratio: &str) -> gt::GeminiImageConfig {
    gt::GeminiImageConfig {
        aspect_ratio: Some(aspect_ratio.to_string()),
        image_size: Some("1K".to_string()),
    }
}

pub(crate) fn gemini_image_config_from_create_image_size(
    size: Option<it::OpenAiImageSize>,
) -> Result<Option<gt::GeminiImageConfig>, TransformError> {
    match size {
        Some(it::OpenAiImageSize::Auto) | None => Ok(None),
        Some(it::OpenAiImageSize::S1024x1024) => Ok(Some(gemini_image_config("1:1"))),
        Some(it::OpenAiImageSize::S1536x1024) => Ok(Some(gemini_image_config("3:2"))),
        Some(it::OpenAiImageSize::S1024x1536) => Ok(Some(gemini_image_config("2:3"))),
        Some(
            it::OpenAiImageSize::S256x256
            | it::OpenAiImageSize::S512x512
            | it::OpenAiImageSize::S1792x1024
            | it::OpenAiImageSize::S1024x1792,
        ) => Err(TransformError::not_implemented(
            "cannot convert OpenAI image request with unsupported size to Gemini generateContent request",
        )),
    }
}

pub(crate) fn gemini_image_config_from_create_image_edit_size(
    size: Option<iet::OpenAiImageEditSize>,
) -> Option<gt::GeminiImageConfig> {
    match size {
        Some(iet::OpenAiImageEditSize::Auto) | None => None,
        Some(iet::OpenAiImageEditSize::S1024x1024) => Some(gemini_image_config("1:1")),
        Some(iet::OpenAiImageEditSize::S1536x1024) => Some(gemini_image_config("3:2")),
        Some(iet::OpenAiImageEditSize::S1024x1536) => Some(gemini_image_config("2:3")),
    }
}

fn parse_base64_data_url(value: &str) -> Result<gt::GeminiBlob, TransformError> {
    let payload = value.strip_prefix("data:").ok_or(TransformError::not_implemented(
        "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
    ))?;
    let (metadata, data) = payload.split_once(',').ok_or(TransformError::not_implemented(
        "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
    ))?;
    let mime_type = metadata
        .strip_suffix(";base64")
        .ok_or(TransformError::not_implemented(
            "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
        ))?;

    if mime_type.is_empty() || data.is_empty() {
        return Err(TransformError::not_implemented(
            "cannot convert OpenAI image edit request with invalid data URL input image to Gemini generateContent request",
        ));
    }

    Ok(gt::GeminiBlob {
        mime_type: mime_type.to_string(),
        data: data.to_string(),
    })
}

pub(crate) fn gemini_part_from_openai_edit_input_image(
    image: iet::OpenAiImageEditInputImage,
) -> Result<gt::GeminiPart, TransformError> {
    if image.file_id.is_some() {
        return Err(TransformError::not_implemented(
            "cannot convert OpenAI image edit request with file_id input image to Gemini generateContent request",
        ));
    }

    let image_url = image.image_url.ok_or(TransformError::not_implemented(
        "cannot convert OpenAI image edit request without image_url input image to Gemini generateContent request",
    ))?;

    if image_url.is_empty() {
        return Err(TransformError::not_implemented(
            "cannot convert OpenAI image edit request without image_url input image to Gemini generateContent request",
        ));
    }

    if image_url.starts_with("data:") {
        return Ok(gt::GeminiPart {
            inline_data: Some(parse_base64_data_url(&image_url)?),
            ..gt::GeminiPart::default()
        });
    }

    Ok(gt::GeminiPart {
        file_data: Some(gt::GeminiFileData {
            mime_type: None,
            file_uri: image_url,
        }),
        ..gt::GeminiPart::default()
    })
}

pub(crate) fn openai_response_headers_from_gemini(
    headers: GeminiResponseHeaders,
) -> OpenAiResponseHeaders {
    OpenAiResponseHeaders {
        extra: headers.extra,
    }
}

pub(crate) fn openai_output_format_from_mime(
    mime_type: &str,
) -> Option<it::OpenAiImageOutputFormat> {
    match mime_type.to_ascii_lowercase().as_str() {
        "image/png" => Some(it::OpenAiImageOutputFormat::Png),
        "image/jpeg" | "image/jpg" => Some(it::OpenAiImageOutputFormat::Jpeg),
        "image/webp" => Some(it::OpenAiImageOutputFormat::Webp),
        _ => None,
    }
}

pub(crate) fn gemini_generated_image_outputs_from_response(
    body: &GeminiGenerateContentResponseBody,
) -> Vec<GeminiGeneratedImageOutput> {
    let mut outputs = Vec::new();

    let Some(candidates) = body.candidates.as_ref() else {
        return outputs;
    };

    for candidate in candidates {
        let Some(content) = candidate.content.as_ref() else {
            continue;
        };

        for part in &content.parts {
            if let Some(inline_data) = part.inline_data.as_ref()
                && inline_data.mime_type.starts_with("image/")
                && !inline_data.data.is_empty()
            {
                outputs.push(GeminiGeneratedImageOutput {
                    image: it::OpenAiGeneratedImage {
                        b64_json: Some(inline_data.data.clone()),
                        revised_prompt: None,
                        url: None,
                    },
                    output_format: openai_output_format_from_mime(&inline_data.mime_type),
                });
            }

            if let Some(file_data) = part.file_data.as_ref()
                && !file_data.file_uri.is_empty()
                && file_data
                    .mime_type
                    .as_deref()
                    .is_none_or(|mime_type| mime_type.starts_with("image/"))
            {
                outputs.push(GeminiGeneratedImageOutput {
                    image: it::OpenAiGeneratedImage {
                        b64_json: None,
                        revised_prompt: None,
                        url: Some(file_data.file_uri.clone()),
                    },
                    output_format: file_data
                        .mime_type
                        .as_deref()
                        .and_then(openai_output_format_from_mime),
                });
            }
        }
    }

    outputs
}

pub fn gemini_inline_image_outputs_from_response(
    body: &GeminiGenerateContentResponseBody,
) -> Vec<GeminiInlineImageOutput> {
    let mut outputs = Vec::new();

    let Some(candidates) = body.candidates.as_ref() else {
        return outputs;
    };

    for (candidate_pos, candidate) in candidates.iter().enumerate() {
        let Some(content) = candidate.content.as_ref() else {
            continue;
        };
        let candidate_index = candidate.index.unwrap_or(candidate_pos as u32);

        for (part_index, part) in content.parts.iter().enumerate() {
            let Some(inline_data) = part.inline_data.as_ref() else {
                continue;
            };
            if !inline_data.mime_type.starts_with("image/") || inline_data.data.is_empty() {
                continue;
            }
            let Some(output_format) = openai_output_format_from_mime(&inline_data.mime_type) else {
                continue;
            };
            outputs.push(GeminiInlineImageOutput {
                candidate_index,
                part_index,
                b64_json: inline_data.data.clone(),
                output_format,
            });
        }
    }

    outputs
}

fn modality_token_count(
    details: Option<&Vec<gt::GeminiModalityTokenCount>>,
    modality: gt::GeminiModality,
) -> u64 {
    details
        .into_iter()
        .flat_map(|details| details.iter())
        .filter(|detail| detail.modality == modality)
        .map(|detail| detail.token_count)
        .sum()
}

pub(crate) fn openai_image_usage_from_gemini(
    usage: Option<&gct::GeminiUsageMetadata>,
) -> Option<it::OpenAiImageUsage> {
    let usage = usage?;

    let input_details = it::OpenAiImageTokenDetails {
        image_tokens: modality_token_count(
            usage.prompt_tokens_details.as_ref(),
            gt::GeminiModality::Image,
        ),
        text_tokens: modality_token_count(
            usage.prompt_tokens_details.as_ref(),
            gt::GeminiModality::Text,
        ),
    };
    let output_details = it::OpenAiImageTokenDetails {
        image_tokens: modality_token_count(
            usage.candidates_tokens_details.as_ref(),
            gt::GeminiModality::Image,
        ),
        text_tokens: modality_token_count(
            usage.candidates_tokens_details.as_ref(),
            gt::GeminiModality::Text,
        ),
    };

    let input_tokens = usage
        .prompt_token_count
        .unwrap_or(input_details.image_tokens + input_details.text_tokens);
    let output_tokens = usage
        .candidates_token_count
        .unwrap_or(output_details.image_tokens + output_details.text_tokens);

    Some(it::OpenAiImageUsage {
        input_tokens,
        input_tokens_details: input_details,
        output_tokens,
        total_tokens: usage
            .total_token_count
            .unwrap_or(input_tokens.saturating_add(output_tokens)),
        output_tokens_details: usage
            .candidates_tokens_details
            .as_ref()
            .map(|_| output_details),
    })
}

pub fn best_effort_openai_image_usage_from_gemini(
    usage: Option<&gct::GeminiUsageMetadata>,
) -> it::OpenAiImageUsage {
    openai_image_usage_from_gemini(usage).unwrap_or(it::OpenAiImageUsage {
        input_tokens: 0,
        input_tokens_details: it::OpenAiImageTokenDetails {
            image_tokens: 0,
            text_tokens: 0,
        },
        output_tokens: 0,
        total_tokens: 0,
        output_tokens_details: None,
    })
}

pub(crate) fn create_image_response_body_from_gemini_response(
    body: GeminiGenerateContentResponseBody,
) -> Result<it::OpenAiCreateImageResponseBody, TransformError> {
    let outputs = gemini_generated_image_outputs_from_response(&body);
    if outputs.is_empty() {
        return Err(TransformError::not_implemented(
            "cannot convert Gemini generateContent response without image output to OpenAI create image response",
        ));
    }

    let mut data = Vec::with_capacity(outputs.len());
    let mut common_output_format: Option<Option<it::OpenAiImageOutputFormat>> = None;
    let mut same_output_format = true;

    for output in outputs {
        if let Some(existing) = common_output_format.as_ref() {
            if *existing != output.output_format {
                same_output_format = false;
            }
        } else {
            common_output_format = Some(output.output_format.clone());
        }
        data.push(output.image);
    }

    Ok(it::OpenAiCreateImageResponseBody {
        created: 0,
        background: None,
        data: Some(data),
        output_format: if same_output_format {
            common_output_format.flatten()
        } else {
            None
        },
        quality: None,
        size: None,
        usage: openai_image_usage_from_gemini(body.usage_metadata.as_ref()),
    })
}