Skip to main content

litellm_rust/providers/
gemini.rs

1use crate::config::ProviderConfig;
2use crate::error::{LiteLLMError, Result};
3use crate::http::send_json;
4use crate::providers::resolve_api_key;
5use crate::types::{
6    ChatContentPart, ChatContentPartFile, ChatContentPartImageUrl, ChatContentPartInputAudio,
7    ChatContentPartText, ChatFile, ChatImageUrl, ChatInputAudio, ChatMessage, ChatMessageContent,
8    ChatRequest, ChatResponse, ImageData, ImageEditRequest, ImageInputData, ImageRequest,
9    ImageResponse, Usage, VideoRequest, VideoResponse,
10};
11use base64::{engine::general_purpose, Engine as _};
12use mime_guess::MimeGuess;
13use reqwest::header::CONTENT_TYPE;
14use reqwest::Client;
15use serde_json::Value;
16use std::env;
17use tokio::time::{sleep, Duration};
18
19/// Default maximum polling attempts for video generation (240 * 5s = 20 minutes)
20pub const DEFAULT_VIDEO_MAX_POLL_ATTEMPTS: u32 = 240;
21/// Default polling interval for video generation status checks
22pub const DEFAULT_VIDEO_POLL_INTERVAL_SECS: u64 = 5;
23
24pub async fn chat(client: &Client, cfg: &ProviderConfig, req: ChatRequest) -> Result<ChatResponse> {
25    let base = cfg
26        .base_url
27        .clone()
28        .ok_or_else(|| LiteLLMError::Config("base_url required".into()))?;
29    let key = resolve_api_key(cfg)?
30        .ok_or_else(|| LiteLLMError::MissingApiKey("GEMINI_API_KEY".into()))?;
31    let model = req.model.trim_start_matches("models/");
32    let url = format!(
33        "{}/models/{}:generateContent",
34        base.trim_end_matches('/'),
35        model
36    );
37
38    let mut messages = req.messages;
39    let system_instruction = extract_system_instruction(&mut messages)?;
40    if messages.is_empty() && !system_instruction.is_empty() {
41        messages.push(ChatMessage {
42            role: "user".to_string(),
43            content: ChatMessageContent::Text(" ".to_string()),
44            name: None,
45            tool_call_id: None,
46            tool_calls: None,
47            function_call: None,
48            provider_specific_fields: None,
49        });
50    }
51
52    let contents = gemini_contents_from_messages(client, messages, model).await?;
53
54    let mut body = serde_json::json!({ "contents": contents });
55    if !system_instruction.is_empty() {
56        body["system_instruction"] = serde_json::json!({ "parts": system_instruction });
57    }
58
59    // Build generationConfig safely without unwrap
60    let mut generation_config = serde_json::Map::new();
61    if let Some(temp) = req.temperature {
62        generation_config.insert("temperature".to_string(), serde_json::json!(temp));
63    }
64    if let Some(max_tokens) = req.max_tokens {
65        generation_config.insert("maxOutputTokens".to_string(), serde_json::json!(max_tokens));
66    }
67    if !generation_config.is_empty() {
68        body["generationConfig"] = Value::Object(generation_config);
69    }
70
71    let mut builder = client.post(url).header("x-goog-api-key", key).json(&body);
72    for (k, v) in &cfg.extra_headers {
73        builder = builder.header(k, v);
74    }
75
76    let (resp, _headers) = send_json::<Value>(builder).await?;
77    let content = extract_text(&resp);
78    let usage = parse_usage(&resp);
79    let debug = env::var("LITELLM_GEMINI_DEBUG").ok().as_deref() == Some("1")
80        || env::var("YOETZ_GEMINI_DEBUG").ok().as_deref() == Some("1");
81    if debug {
82        if let Ok(pretty) = serde_json::to_string_pretty(&resp) {
83            eprintln!("litellm-rust gemini raw response:\n{pretty}");
84        }
85    }
86
87    Ok(ChatResponse {
88        content,
89        usage,
90        response_id: None,
91        header_cost: None,
92        raw: if debug { Some(resp) } else { None },
93    })
94}
95
96/// Video generation options for configurable timeouts.
97#[derive(Debug, Clone)]
98pub struct VideoGenerationOptions {
99    /// Maximum number of polling attempts
100    pub max_poll_attempts: u32,
101    /// Interval between polling attempts in seconds
102    pub poll_interval_secs: u64,
103}
104
105impl Default for VideoGenerationOptions {
106    fn default() -> Self {
107        Self {
108            max_poll_attempts: DEFAULT_VIDEO_MAX_POLL_ATTEMPTS,
109            poll_interval_secs: DEFAULT_VIDEO_POLL_INTERVAL_SECS,
110        }
111    }
112}
113
114pub async fn video_generation(
115    client: &Client,
116    cfg: &ProviderConfig,
117    req: VideoRequest,
118) -> Result<VideoResponse> {
119    video_generation_with_options(client, cfg, req, VideoGenerationOptions::default()).await
120}
121
122pub async fn video_generation_with_options(
123    client: &Client,
124    cfg: &ProviderConfig,
125    req: VideoRequest,
126    options: VideoGenerationOptions,
127) -> Result<VideoResponse> {
128    let base = cfg
129        .base_url
130        .clone()
131        .ok_or_else(|| LiteLLMError::Config("base_url required".into()))?;
132    let key = resolve_api_key(cfg)?
133        .ok_or_else(|| LiteLLMError::MissingApiKey("GEMINI_API_KEY".into()))?;
134    let model = req.model.trim_start_matches("models/");
135
136    let url = format!(
137        "{}/models/{}:predictLongRunning",
138        base.trim_end_matches('/'),
139        model
140    );
141
142    let mut parameters = serde_json::Map::new();
143    if let Some(seconds) = req.seconds {
144        parameters.insert("durationSeconds".to_string(), serde_json::json!(seconds));
145    }
146    if let Some(size) = req.size {
147        parameters.insert("resolution".to_string(), serde_json::json!(size));
148    }
149
150    let body = serde_json::json!({
151        "instances": [{ "prompt": req.prompt }],
152        "parameters": Value::Object(parameters),
153    });
154
155    let mut builder = client.post(url).header("x-goog-api-key", &key).json(&body);
156    for (k, v) in &cfg.extra_headers {
157        builder = builder.header(k, v);
158    }
159
160    let (resp, _headers) = send_json::<Value>(builder).await?;
161    let op_name = resp
162        .get("name")
163        .and_then(|v| v.as_str())
164        .ok_or_else(|| LiteLLMError::Parse("missing operation name".into()))?;
165
166    let op_url = if op_name.starts_with("http") {
167        op_name.to_string()
168    } else {
169        format!("{}/{}", base.trim_end_matches('/'), op_name)
170    };
171
172    let poll_interval = Duration::from_secs(options.poll_interval_secs);
173
174    for attempt in 0..options.max_poll_attempts {
175        let mut poll = client.get(&op_url).header("x-goog-api-key", &key);
176        for (k, v) in &cfg.extra_headers {
177            poll = poll.header(k, v);
178        }
179        let (op_resp, _headers) = send_json::<Value>(poll).await?;
180
181        if op_resp.get("done").and_then(|v| v.as_bool()) == Some(true) {
182            if op_resp.get("error").is_some() {
183                return Err(LiteLLMError::http("video generation failed"));
184            }
185
186            let uri = extract_video_uri(&op_resp)
187                .ok_or_else(|| LiteLLMError::Parse("missing video uri".into()))?;
188
189            return Ok(VideoResponse {
190                video_url: Some(uri),
191                raw: None,
192            });
193        }
194
195        if attempt + 1 >= options.max_poll_attempts {
196            return Err(LiteLLMError::http(format!(
197                "video generation timed out after {} attempts",
198                options.max_poll_attempts
199            )));
200        }
201        sleep(poll_interval).await;
202    }
203
204    Err(LiteLLMError::http("video generation timed out"))
205}
206
207pub async fn image_generation(
208    client: &Client,
209    cfg: &ProviderConfig,
210    req: ImageRequest,
211) -> Result<ImageResponse> {
212    let base = cfg
213        .base_url
214        .clone()
215        .ok_or_else(|| LiteLLMError::Config("base_url required".into()))?;
216    let key = resolve_api_key(cfg)?
217        .ok_or_else(|| LiteLLMError::MissingApiKey("GEMINI_API_KEY".into()))?;
218    let model = req.model.trim_start_matches("models/");
219    let n = req.n.unwrap_or(1);
220
221    if model.contains("gemini") {
222        // Path A: Gemini native image generation via generateContent
223        let url = format!(
224            "{}/models/{}:generateContent",
225            base.trim_end_matches('/'),
226            model
227        );
228
229        let body = serde_json::json!({
230            "contents": [{ "parts": [{ "text": req.prompt }] }],
231            "generationConfig": { "response_modalities": ["IMAGE", "TEXT"] }
232        });
233
234        let mut builder = client.post(url).header("x-goog-api-key", &key).json(&body);
235        for (k, v) in &cfg.extra_headers {
236            builder = builder.header(k, v);
237        }
238
239        let (resp, _headers) = send_json::<Value>(builder).await?;
240        let usage = parse_usage(&resp);
241        let images = extract_images_from_candidates(&resp);
242
243        Ok(ImageResponse {
244            images,
245            usage,
246            raw: Some(resp),
247        })
248    } else {
249        // Path B: Imagen models via predict endpoint
250        let url = format!("{}/models/{}:predict", base.trim_end_matches('/'), model);
251
252        let body = serde_json::json!({
253            "instances": [{ "prompt": req.prompt }],
254            "parameters": { "sampleCount": n }
255        });
256
257        let mut builder = client.post(url).header("x-goog-api-key", &key).json(&body);
258        for (k, v) in &cfg.extra_headers {
259            builder = builder.header(k, v);
260        }
261
262        let (resp, _headers) = send_json::<Value>(builder).await?;
263        let images = extract_images_from_predictions(&resp);
264
265        Ok(ImageResponse {
266            images,
267            usage: Usage::default(),
268            raw: Some(resp),
269        })
270    }
271}
272
273pub async fn image_editing(
274    client: &Client,
275    cfg: &ProviderConfig,
276    req: ImageEditRequest,
277) -> Result<ImageResponse> {
278    let base = cfg
279        .base_url
280        .clone()
281        .ok_or_else(|| LiteLLMError::Config("base_url required".into()))?;
282    let key = resolve_api_key(cfg)?
283        .ok_or_else(|| LiteLLMError::MissingApiKey("GEMINI_API_KEY".into()))?;
284    let model = req.model.trim_start_matches("models/");
285
286    let url = format!(
287        "{}/models/{}:generateContent",
288        base.trim_end_matches('/'),
289        model
290    );
291
292    let mut parts = Vec::with_capacity(req.images.len() + 1);
293    parts.push(serde_json::json!({ "text": req.prompt }));
294    for image in &req.images {
295        parts.push(image_input_to_part(client, image).await?);
296    }
297
298    let body = serde_json::json!({
299        "contents": [{ "role": "user", "parts": parts }],
300        "generationConfig": {
301            "response_modalities": ["IMAGE", "TEXT"]
302        }
303    });
304
305    let mut builder = client.post(url).header("x-goog-api-key", &key).json(&body);
306    for (k, v) in &cfg.extra_headers {
307        builder = builder.header(k, v);
308    }
309
310    let (resp, _headers) = send_json::<Value>(builder).await?;
311    let usage = parse_usage(&resp);
312    let images = extract_images_from_candidates(&resp);
313
314    Ok(ImageResponse {
315        images,
316        usage,
317        raw: Some(resp),
318    })
319}
320
321async fn image_input_to_part(_client: &Client, input: &ImageInputData) -> Result<Value> {
322    if let Some(b64) = &input.b64_json {
323        let mime = input
324            .mime_type
325            .as_deref()
326            .unwrap_or("image/png")
327            .to_string();
328        return Ok(serde_json::json!({
329            "inline_data": { "mime_type": mime, "data": b64 }
330        }));
331    }
332    if let Some(url) = &input.url {
333        return process_gemini_media_url(url, input.mime_type.as_deref(), None, None, "");
334    }
335    Err(LiteLLMError::Config(
336        "ImageInputData requires b64_json or url".into(),
337    ))
338}
339
340fn extract_images_from_candidates(resp: &Value) -> Vec<ImageData> {
341    let mut images = Vec::new();
342    if let Some(candidates) = resp.get("candidates").and_then(|v| v.as_array()) {
343        for candidate in candidates {
344            if let Some(parts) = candidate
345                .get("content")
346                .and_then(|v| v.get("parts"))
347                .and_then(|v| v.as_array())
348            {
349                for part in parts {
350                    if let Some(data) = part
351                        .get("inlineData")
352                        .and_then(|v| v.get("data"))
353                        .and_then(|v| v.as_str())
354                    {
355                        let mime = part
356                            .get("inlineData")
357                            .and_then(|v| v.get("mimeType"))
358                            .and_then(|v| v.as_str())
359                            .map(|s| s.to_string());
360                        images.push(ImageData {
361                            b64_json: Some(data.to_string()),
362                            url: None,
363                            revised_prompt: None,
364                            mime_type: mime,
365                        });
366                    }
367                }
368            }
369        }
370    }
371    images
372}
373
374fn extract_images_from_predictions(resp: &Value) -> Vec<ImageData> {
375    let mut images = Vec::new();
376    if let Some(predictions) = resp.get("predictions").and_then(|v| v.as_array()) {
377        for prediction in predictions {
378            if let Some(data) = prediction
379                .get("bytesBase64Encoded")
380                .and_then(|v| v.as_str())
381            {
382                let mime = prediction
383                    .get("mimeType")
384                    .and_then(|v| v.as_str())
385                    .map(|s| s.to_string());
386                images.push(ImageData {
387                    b64_json: Some(data.to_string()),
388                    url: None,
389                    revised_prompt: None,
390                    mime_type: mime,
391                });
392            }
393        }
394    }
395    images
396}
397
398fn extract_text(resp: &Value) -> String {
399    if let Some(candidates) = resp.get("candidates").and_then(|v| v.as_array()) {
400        if let Some(first) = candidates.first() {
401            if let Some(parts) = first
402                .get("content")
403                .and_then(|v| v.get("parts"))
404                .and_then(|v| v.as_array())
405            {
406                let mut text = String::new();
407                for part in parts {
408                    if let Some(piece) = part.get("text").and_then(|v| v.as_str()) {
409                        text.push_str(piece);
410                    }
411                }
412                return text;
413            }
414        }
415    }
416    String::new()
417}
418
419fn parse_usage(resp: &Value) -> Usage {
420    if let Some(meta) = resp.get("usageMetadata").and_then(|v| v.as_object()) {
421        return Usage {
422            prompt_tokens: meta.get("promptTokenCount").and_then(|v| v.as_u64()),
423            completion_tokens: meta.get("candidatesTokenCount").and_then(|v| v.as_u64()),
424            thoughts_tokens: meta.get("thoughtsTokenCount").and_then(|v| v.as_u64()),
425            total_tokens: meta.get("totalTokenCount").and_then(|v| v.as_u64()),
426            cost_usd: None,
427        };
428    }
429    Usage::default()
430}
431
432fn extract_system_instruction(messages: &mut Vec<ChatMessage>) -> Result<Vec<Value>> {
433    let mut parts = Vec::new();
434    let mut indices = Vec::new();
435    for (idx, msg) in messages.iter().enumerate() {
436        if msg.role == "system" {
437            match &msg.content {
438                ChatMessageContent::Text(text) => {
439                    if !text.is_empty() {
440                        parts.push(serde_json::json!({ "text": text }));
441                    }
442                }
443                ChatMessageContent::Parts(content_parts) => {
444                    let mut system_text = String::new();
445                    for part in content_parts {
446                        if let ChatContentPart::Text(ChatContentPartText { text, .. }) = part {
447                            system_text.push_str(text);
448                        }
449                    }
450                    if !system_text.is_empty() {
451                        parts.push(serde_json::json!({ "text": system_text }));
452                    }
453                }
454            }
455            indices.push(idx);
456        }
457    }
458    for idx in indices.into_iter().rev() {
459        messages.remove(idx);
460    }
461    Ok(parts)
462}
463
464async fn gemini_contents_from_messages(
465    client: &Client,
466    messages: Vec<ChatMessage>,
467    model: &str,
468) -> Result<Vec<Value>> {
469    let mut contents: Vec<Value> = Vec::new();
470    let mut msg_i = 0;
471    let mut tool_call_responses: Vec<Value> = Vec::new();
472    let mut last_tool_calls: Vec<ToolCallInfo> = Vec::new();
473
474    while msg_i < messages.len() {
475        let role = messages[msg_i].role.as_str();
476        if role == "user" {
477            let mut parts: Vec<Value> = Vec::new();
478            while msg_i < messages.len() && messages[msg_i].role == "user" {
479                parts.extend(
480                    gemini_parts_from_content(client, &messages[msg_i].content, model).await?,
481                );
482                msg_i += 1;
483            }
484            if !parts.is_empty() {
485                contents.push(serde_json::json!({ "role": "user", "parts": parts }));
486            }
487            continue;
488        }
489
490        if role == "assistant" {
491            let mut parts: Vec<Value> = Vec::new();
492            while msg_i < messages.len() && messages[msg_i].role == "assistant" {
493                let message = &messages[msg_i];
494                parts.extend(gemini_parts_from_content(client, &message.content, model).await?);
495                let (tool_parts, tool_infos) = tool_call_parts_from_message(message)?;
496                if !tool_parts.is_empty() {
497                    parts.extend(tool_parts);
498                    if !tool_infos.is_empty() {
499                        last_tool_calls = tool_infos;
500                    }
501                }
502                msg_i += 1;
503            }
504            if !parts.is_empty() {
505                contents.push(serde_json::json!({ "role": "model", "parts": parts }));
506            }
507            continue;
508        }
509
510        if role == "tool" || role == "function" {
511            while msg_i < messages.len()
512                && (messages[msg_i].role == "tool" || messages[msg_i].role == "function")
513            {
514                let response_parts =
515                    tool_response_parts(client, &messages[msg_i], &last_tool_calls).await?;
516                tool_call_responses.extend(response_parts);
517                msg_i += 1;
518            }
519            if !tool_call_responses.is_empty() {
520                contents.push(serde_json::json!({ "parts": tool_call_responses }));
521                tool_call_responses = Vec::new();
522            }
523            continue;
524        }
525
526        let parts = gemini_parts_from_content(client, &messages[msg_i].content, model).await?;
527        if !parts.is_empty() {
528            contents.push(serde_json::json!({ "role": "user", "parts": parts }));
529        }
530        msg_i += 1;
531    }
532
533    if !tool_call_responses.is_empty() {
534        contents.push(serde_json::json!({ "parts": tool_call_responses }));
535    }
536    if contents.is_empty() {
537        contents.push(serde_json::json!({
538            "role": "user",
539            "parts": [{ "text": " " }]
540        }));
541    }
542    Ok(contents)
543}
544
545async fn gemini_parts_from_content(
546    _client: &Client,
547    content: &ChatMessageContent,
548    model: &str,
549) -> Result<Vec<Value>> {
550    match content {
551        ChatMessageContent::Text(text) => Ok(vec![serde_json::json!({ "text": text })]),
552        ChatMessageContent::Parts(parts) => {
553            let mut out = Vec::new();
554            for part in parts {
555                match part {
556                    ChatContentPart::Text(ChatContentPartText { text, .. }) => {
557                        if !text.is_empty() {
558                            out.push(serde_json::json!({ "text": text }));
559                        }
560                    }
561                    ChatContentPart::ImageUrl(ChatContentPartImageUrl { image_url, .. }) => {
562                        let detail = match image_url {
563                            ChatImageUrl::Object(obj) => obj.detail.as_deref(),
564                            ChatImageUrl::Url(_) => None,
565                        };
566                        out.push(process_gemini_media(image_url, detail, None, None, model)?);
567                    }
568                    ChatContentPart::InputAudio(ChatContentPartInputAudio {
569                        input_audio, ..
570                    }) => {
571                        out.push(process_gemini_audio(input_audio)?);
572                    }
573                    ChatContentPart::File(ChatContentPartFile { file, .. }) => {
574                        out.push(process_gemini_file(file, model)?);
575                    }
576                    ChatContentPart::Other(value) => {
577                        return Err(LiteLLMError::Config(format!(
578                            "unsupported gemini content part: {}",
579                            value
580                        )));
581                    }
582                }
583            }
584            Ok(out)
585        }
586    }
587}
588
589#[derive(Debug, Clone)]
590struct ToolCallInfo {
591    id: Option<String>,
592    name: String,
593}
594
595fn tool_call_parts_from_message(message: &ChatMessage) -> Result<(Vec<Value>, Vec<ToolCallInfo>)> {
596    let mut parts = Vec::new();
597    let mut infos = Vec::new();
598
599    if let Some(tool_calls) = &message.tool_calls {
600        if let Some(array) = tool_calls.as_array() {
601            for tool in array {
602                let function = tool
603                    .get("function")
604                    .ok_or_else(|| LiteLLMError::Config("tool_call missing function".into()))?;
605                let name = function
606                    .get("name")
607                    .and_then(|v| v.as_str())
608                    .ok_or_else(|| LiteLLMError::Config("tool_call missing name".into()))?;
609                let args_raw = function
610                    .get("arguments")
611                    .and_then(|v| v.as_str())
612                    .unwrap_or("");
613                let args = if args_raw.trim().is_empty() {
614                    Value::Object(serde_json::Map::new())
615                } else {
616                    serde_json::from_str(args_raw)
617                        .map_err(|e| LiteLLMError::Parse(e.to_string()))?
618                };
619                parts.push(serde_json::json!({
620                    "function_call": { "name": name, "args": args }
621                }));
622                let id = tool
623                    .get("id")
624                    .and_then(|v| v.as_str())
625                    .map(|s| s.to_string());
626                infos.push(ToolCallInfo {
627                    id,
628                    name: name.to_string(),
629                });
630            }
631            return Ok((parts, infos));
632        }
633    }
634
635    if let Some(function_call) = &message.function_call {
636        let name = function_call
637            .get("name")
638            .and_then(|v| v.as_str())
639            .ok_or_else(|| LiteLLMError::Config("function_call missing name".into()))?;
640        let args_raw = function_call
641            .get("arguments")
642            .and_then(|v| v.as_str())
643            .unwrap_or("");
644        let args = if args_raw.trim().is_empty() {
645            Value::Object(serde_json::Map::new())
646        } else {
647            serde_json::from_str(args_raw).map_err(|e| LiteLLMError::Parse(e.to_string()))?
648        };
649        parts.push(serde_json::json!({
650            "function_call": { "name": name, "args": args }
651        }));
652        infos.push(ToolCallInfo {
653            id: None,
654            name: name.to_string(),
655        });
656    }
657
658    Ok((parts, infos))
659}
660
661async fn tool_response_parts(
662    client: &Client,
663    message: &ChatMessage,
664    last_tool_calls: &[ToolCallInfo],
665) -> Result<Vec<Value>> {
666    let name = resolve_tool_name(message, last_tool_calls)?;
667    let (content_str, mut inline_parts) = extract_tool_content(client, &message.content).await?;
668    let response = parse_tool_response_data(&content_str);
669    let function_part = serde_json::json!({
670        "function_response": { "name": name, "response": response }
671    });
672    let mut parts = vec![function_part];
673    parts.append(&mut inline_parts);
674    Ok(parts)
675}
676
677fn resolve_tool_name(message: &ChatMessage, last_tool_calls: &[ToolCallInfo]) -> Result<String> {
678    if let Some(name) = &message.name {
679        return Ok(name.clone());
680    }
681    if let Some(tool_call_id) = &message.tool_call_id {
682        if let Some(info) = last_tool_calls
683            .iter()
684            .find(|info| info.id.as_deref() == Some(tool_call_id.as_str()))
685        {
686            return Ok(info.name.clone());
687        }
688    }
689    Err(LiteLLMError::Config(
690        "missing tool name for tool response".into(),
691    ))
692}
693
694async fn extract_tool_content(
695    client: &Client,
696    content: &ChatMessageContent,
697) -> Result<(String, Vec<Value>)> {
698    let mut text = String::new();
699    let mut inline_parts = Vec::new();
700    match content {
701        ChatMessageContent::Text(t) => {
702            text.push_str(t);
703        }
704        ChatMessageContent::Parts(parts) => {
705            for part in parts {
706                match part {
707                    ChatContentPart::Text(ChatContentPartText { text: t, .. }) => {
708                        text.push_str(t);
709                    }
710                    ChatContentPart::ImageUrl(ChatContentPartImageUrl { image_url, .. }) => {
711                        inline_parts.push(inline_data_from_image_url(client, image_url).await?);
712                    }
713                    ChatContentPart::InputAudio(ChatContentPartInputAudio {
714                        input_audio, ..
715                    }) => {
716                        inline_parts.push(inline_data_from_audio(input_audio)?);
717                    }
718                    ChatContentPart::File(ChatContentPartFile { file, .. }) => {
719                        inline_parts.push(inline_data_from_file(client, file).await?);
720                    }
721                    ChatContentPart::Other(_) => {}
722                }
723            }
724        }
725    }
726    Ok((text, inline_parts))
727}
728
729fn parse_tool_response_data(content: &str) -> Value {
730    let trimmed = content.trim();
731    if trimmed.starts_with('{') || trimmed.starts_with('[') {
732        if let Ok(value) = serde_json::from_str(trimmed) {
733            return value;
734        }
735    }
736    serde_json::json!({ "content": content })
737}
738
739async fn inline_data_from_image_url(client: &Client, image_url: &ChatImageUrl) -> Result<Value> {
740    let (url, format) = match image_url {
741        ChatImageUrl::Url(url) => (url.as_str(), None),
742        ChatImageUrl::Object(obj) => (obj.url.as_str(), obj.format.as_deref()),
743    };
744    inline_data_from_url(client, url, format).await
745}
746
747fn inline_data_from_audio(input_audio: &ChatInputAudio) -> Result<Value> {
748    let format = if input_audio.format.starts_with("audio/") {
749        input_audio.format.clone()
750    } else {
751        format!("audio/{}", input_audio.format)
752    };
753    Ok(serde_json::json!({
754        "inline_data": { "mime_type": format, "data": input_audio.data }
755    }))
756}
757
758async fn inline_data_from_file(client: &Client, file: &ChatFile) -> Result<Value> {
759    let passed = file
760        .file_id
761        .as_ref()
762        .or(file.file_data.as_ref())
763        .ok_or_else(|| LiteLLMError::Config("file_id or file_data required".into()))?;
764    inline_data_from_url(client, passed, file.format.as_deref()).await
765}
766
767async fn inline_data_from_url(client: &Client, url: &str, format: Option<&str>) -> Result<Value> {
768    if let Some((mime, data)) = parse_data_url(url, format) {
769        return Ok(serde_json::json!({
770            "inline_data": { "mime_type": mime, "data": data }
771        }));
772    }
773    if url.starts_with("http://") || url.starts_with("https://") {
774        let (mime, data) = fetch_bytes_with_mime(client, url, format).await?;
775        return Ok(serde_json::json!({
776            "inline_data": { "mime_type": mime, "data": data }
777        }));
778    }
779    Err(LiteLLMError::Config("unsupported inline data url".into()))
780}
781
782async fn fetch_bytes_with_mime(
783    client: &Client,
784    url: &str,
785    format: Option<&str>,
786) -> Result<(String, String)> {
787    let resp = client.get(url).send().await.map_err(LiteLLMError::from)?;
788    let headers = resp.headers().clone();
789    let bytes = resp.bytes().await.map_err(LiteLLMError::from)?;
790    let header_mime = headers
791        .get(CONTENT_TYPE)
792        .and_then(|v| v.to_str().ok())
793        .map(|v| v.split(';').next().unwrap_or(v).trim().to_string());
794    let mime = format
795        .map(|v| v.to_string())
796        .or(header_mime)
797        .or_else(|| mime_type_from_url(url))
798        .unwrap_or_else(|| "application/octet-stream".to_string());
799    let data = general_purpose::STANDARD.encode(bytes);
800    Ok((mime, data))
801}
802
803fn process_gemini_audio(input_audio: &ChatInputAudio) -> Result<Value> {
804    let format = if input_audio.format.starts_with("audio/") {
805        input_audio.format.clone()
806    } else {
807        format!("audio/{}", input_audio.format)
808    };
809    Ok(serde_json::json!({
810        "inline_data": {
811            "mime_type": format,
812            "data": input_audio.data
813        }
814    }))
815}
816
817fn process_gemini_file(file: &ChatFile, model: &str) -> Result<Value> {
818    let passed = file
819        .file_id
820        .as_ref()
821        .or(file.file_data.as_ref())
822        .ok_or_else(|| LiteLLMError::Config("file_id or file_data required".into()))?;
823    process_gemini_media_url(
824        passed,
825        file.format.as_deref(),
826        file.detail.as_deref(),
827        file.video_metadata.as_ref(),
828        model,
829    )
830}
831
832fn process_gemini_media(
833    image_url: &ChatImageUrl,
834    detail: Option<&str>,
835    video_metadata: Option<&Value>,
836    format: Option<&str>,
837    model: &str,
838) -> Result<Value> {
839    let (url, format) = match image_url {
840        ChatImageUrl::Url(url) => (url.as_str(), format),
841        ChatImageUrl::Object(obj) => (obj.url.as_str(), obj.format.as_deref().or(format)),
842    };
843    process_gemini_media_url(url, format, detail, video_metadata, model)
844}
845
846fn process_gemini_media_url(
847    url: &str,
848    format: Option<&str>,
849    detail: Option<&str>,
850    video_metadata: Option<&Value>,
851    _model: &str,
852) -> Result<Value> {
853    if url.starts_with("gs://") || url.starts_with("https://") || url.starts_with("http://") {
854        let mime_type = format
855            .map(|v| v.to_string())
856            .or_else(|| mime_type_from_url(url))
857            .ok_or_else(|| LiteLLMError::Config("missing media mime type".into()))?;
858        let mut part = serde_json::json!({
859            "file_data": { "mime_type": mime_type, "file_uri": url }
860        });
861        apply_gemini_metadata(&mut part, detail, video_metadata);
862        return Ok(part);
863    }
864
865    if let Some((media_type, data)) = parse_data_url(url, format) {
866        let mut part = serde_json::json!({
867            "inline_data": { "mime_type": media_type, "data": data }
868        });
869        apply_gemini_metadata(&mut part, detail, video_metadata);
870        return Ok(part);
871    }
872
873    Err(LiteLLMError::Config("unsupported gemini media url".into()))
874}
875
876fn apply_gemini_metadata(part: &mut Value, detail: Option<&str>, video_metadata: Option<&Value>) {
877    if let Some(detail) = detail {
878        if let Some(level) = detail_to_media_resolution(detail) {
879            if let Some(obj) = part.as_object_mut() {
880                obj.insert(
881                    "media_resolution".to_string(),
882                    serde_json::json!({ "level": level }),
883                );
884            }
885        }
886    }
887    if let Some(video_metadata) = video_metadata {
888        if let Some(obj) = part.as_object_mut() {
889            obj.insert("video_metadata".to_string(), video_metadata.clone());
890        }
891    }
892}
893
894fn detail_to_media_resolution(detail: &str) -> Option<&'static str> {
895    match detail {
896        "low" => Some("MEDIA_RESOLUTION_LOW"),
897        "medium" => Some("MEDIA_RESOLUTION_MEDIUM"),
898        "high" => Some("MEDIA_RESOLUTION_HIGH"),
899        "ultra_high" => Some("MEDIA_RESOLUTION_ULTRA_HIGH"),
900        _ => None,
901    }
902}
903
904fn parse_data_url(url: &str, override_format: Option<&str>) -> Option<(String, String)> {
905    if url.starts_with("data:") {
906        let stripped = url.strip_prefix("data:").unwrap_or(url);
907        if let Some((meta, data)) = stripped.split_once(',') {
908            let mut media_type = meta.split(';').next().unwrap_or("application/octet-stream");
909            if let Some(fmt) = override_format {
910                media_type = fmt;
911            }
912            return Some((media_type.to_string(), data.to_string()));
913        }
914    }
915    None
916}
917
918fn mime_type_from_url(url: &str) -> Option<String> {
919    let path = url.split('?').next().unwrap_or(url);
920    MimeGuess::from_path(path)
921        .first_raw()
922        .map(|m| m.to_string())
923}
924
925fn extract_video_uri(resp: &Value) -> Option<String> {
926    let response = resp.get("response")?;
927    if let Some(uri) = response
928        .pointer("/generateVideoResponse/generatedSamples/0/video/uri")
929        .and_then(|v| v.as_str())
930    {
931        return Some(uri.to_string());
932    }
933    if let Some(uri) = response
934        .pointer("/generatedVideos/0/uri")
935        .and_then(|v| v.as_str())
936    {
937        return Some(uri.to_string());
938    }
939    if let Some(uri) = response.pointer("/videos/0/uri").and_then(|v| v.as_str()) {
940        return Some(uri.to_string());
941    }
942    None
943}