1use crate::config::ProviderConfig;
2use crate::error::{LiteLLMError, Result};
3use crate::http::send_json;
4use crate::providers::resolve_api_key;
5use crate::types::{
6 ChatContentPart, ChatContentPartFile, ChatContentPartImageUrl, ChatContentPartInputAudio,
7 ChatContentPartText, ChatFile, ChatImageUrl, ChatInputAudio, ChatMessage, ChatMessageContent,
8 ChatRequest, ChatResponse, ImageData, ImageEditRequest, ImageInputData, ImageRequest,
9 ImageResponse, Usage, VideoRequest, VideoResponse,
10};
11use base64::{engine::general_purpose, Engine as _};
12use mime_guess::MimeGuess;
13use reqwest::header::CONTENT_TYPE;
14use reqwest::Client;
15use serde_json::Value;
16use std::env;
17use tokio::time::{sleep, Duration};
18
19pub const DEFAULT_VIDEO_MAX_POLL_ATTEMPTS: u32 = 240;
21pub const DEFAULT_VIDEO_POLL_INTERVAL_SECS: u64 = 5;
23
24pub async fn chat(client: &Client, cfg: &ProviderConfig, req: ChatRequest) -> Result<ChatResponse> {
25 let base = cfg
26 .base_url
27 .clone()
28 .ok_or_else(|| LiteLLMError::Config("base_url required".into()))?;
29 let key = resolve_api_key(cfg)?
30 .ok_or_else(|| LiteLLMError::MissingApiKey("GEMINI_API_KEY".into()))?;
31 let model = req.model.trim_start_matches("models/");
32 let url = format!(
33 "{}/models/{}:generateContent",
34 base.trim_end_matches('/'),
35 model
36 );
37
38 let mut messages = req.messages;
39 let system_instruction = extract_system_instruction(&mut messages)?;
40 if messages.is_empty() && !system_instruction.is_empty() {
41 messages.push(ChatMessage {
42 role: "user".to_string(),
43 content: ChatMessageContent::Text(" ".to_string()),
44 name: None,
45 tool_call_id: None,
46 tool_calls: None,
47 function_call: None,
48 provider_specific_fields: None,
49 });
50 }
51
52 let contents = gemini_contents_from_messages(client, messages, model).await?;
53
54 let mut body = serde_json::json!({ "contents": contents });
55 if !system_instruction.is_empty() {
56 body["system_instruction"] = serde_json::json!({ "parts": system_instruction });
57 }
58
59 let mut generation_config = serde_json::Map::new();
61 if let Some(temp) = req.temperature {
62 generation_config.insert("temperature".to_string(), serde_json::json!(temp));
63 }
64 if let Some(max_tokens) = req.max_tokens {
65 generation_config.insert("maxOutputTokens".to_string(), serde_json::json!(max_tokens));
66 }
67 if !generation_config.is_empty() {
68 body["generationConfig"] = Value::Object(generation_config);
69 }
70
71 let mut builder = client.post(url).header("x-goog-api-key", key).json(&body);
72 for (k, v) in &cfg.extra_headers {
73 builder = builder.header(k, v);
74 }
75
76 let (resp, _headers) = send_json::<Value>(builder).await?;
77 let content = extract_text(&resp);
78 let usage = parse_usage(&resp);
79 let debug = env::var("LITELLM_GEMINI_DEBUG").ok().as_deref() == Some("1")
80 || env::var("YOETZ_GEMINI_DEBUG").ok().as_deref() == Some("1");
81 if debug {
82 if let Ok(pretty) = serde_json::to_string_pretty(&resp) {
83 eprintln!("litellm-rust gemini raw response:\n{pretty}");
84 }
85 }
86
87 Ok(ChatResponse {
88 content,
89 usage,
90 response_id: None,
91 header_cost: None,
92 raw: if debug { Some(resp) } else { None },
93 })
94}
95
96#[derive(Debug, Clone)]
98pub struct VideoGenerationOptions {
99 pub max_poll_attempts: u32,
101 pub poll_interval_secs: u64,
103}
104
105impl Default for VideoGenerationOptions {
106 fn default() -> Self {
107 Self {
108 max_poll_attempts: DEFAULT_VIDEO_MAX_POLL_ATTEMPTS,
109 poll_interval_secs: DEFAULT_VIDEO_POLL_INTERVAL_SECS,
110 }
111 }
112}
113
114pub async fn video_generation(
115 client: &Client,
116 cfg: &ProviderConfig,
117 req: VideoRequest,
118) -> Result<VideoResponse> {
119 video_generation_with_options(client, cfg, req, VideoGenerationOptions::default()).await
120}
121
122pub async fn video_generation_with_options(
123 client: &Client,
124 cfg: &ProviderConfig,
125 req: VideoRequest,
126 options: VideoGenerationOptions,
127) -> Result<VideoResponse> {
128 let base = cfg
129 .base_url
130 .clone()
131 .ok_or_else(|| LiteLLMError::Config("base_url required".into()))?;
132 let key = resolve_api_key(cfg)?
133 .ok_or_else(|| LiteLLMError::MissingApiKey("GEMINI_API_KEY".into()))?;
134 let model = req.model.trim_start_matches("models/");
135
136 let url = format!(
137 "{}/models/{}:predictLongRunning",
138 base.trim_end_matches('/'),
139 model
140 );
141
142 let mut parameters = serde_json::Map::new();
143 if let Some(seconds) = req.seconds {
144 parameters.insert("durationSeconds".to_string(), serde_json::json!(seconds));
145 }
146 if let Some(size) = req.size {
147 parameters.insert("resolution".to_string(), serde_json::json!(size));
148 }
149
150 let body = serde_json::json!({
151 "instances": [{ "prompt": req.prompt }],
152 "parameters": Value::Object(parameters),
153 });
154
155 let mut builder = client.post(url).header("x-goog-api-key", &key).json(&body);
156 for (k, v) in &cfg.extra_headers {
157 builder = builder.header(k, v);
158 }
159
160 let (resp, _headers) = send_json::<Value>(builder).await?;
161 let op_name = resp
162 .get("name")
163 .and_then(|v| v.as_str())
164 .ok_or_else(|| LiteLLMError::Parse("missing operation name".into()))?;
165
166 let op_url = if op_name.starts_with("http") {
167 op_name.to_string()
168 } else {
169 format!("{}/{}", base.trim_end_matches('/'), op_name)
170 };
171
172 let poll_interval = Duration::from_secs(options.poll_interval_secs);
173
174 for attempt in 0..options.max_poll_attempts {
175 let mut poll = client.get(&op_url).header("x-goog-api-key", &key);
176 for (k, v) in &cfg.extra_headers {
177 poll = poll.header(k, v);
178 }
179 let (op_resp, _headers) = send_json::<Value>(poll).await?;
180
181 if op_resp.get("done").and_then(|v| v.as_bool()) == Some(true) {
182 if op_resp.get("error").is_some() {
183 return Err(LiteLLMError::http("video generation failed"));
184 }
185
186 let uri = extract_video_uri(&op_resp)
187 .ok_or_else(|| LiteLLMError::Parse("missing video uri".into()))?;
188
189 return Ok(VideoResponse {
190 video_url: Some(uri),
191 raw: None,
192 });
193 }
194
195 if attempt + 1 >= options.max_poll_attempts {
196 return Err(LiteLLMError::http(format!(
197 "video generation timed out after {} attempts",
198 options.max_poll_attempts
199 )));
200 }
201 sleep(poll_interval).await;
202 }
203
204 Err(LiteLLMError::http("video generation timed out"))
205}
206
207pub async fn image_generation(
208 client: &Client,
209 cfg: &ProviderConfig,
210 req: ImageRequest,
211) -> Result<ImageResponse> {
212 let base = cfg
213 .base_url
214 .clone()
215 .ok_or_else(|| LiteLLMError::Config("base_url required".into()))?;
216 let key = resolve_api_key(cfg)?
217 .ok_or_else(|| LiteLLMError::MissingApiKey("GEMINI_API_KEY".into()))?;
218 let model = req.model.trim_start_matches("models/");
219 let n = req.n.unwrap_or(1);
220
221 if model.contains("gemini") {
222 let url = format!(
224 "{}/models/{}:generateContent",
225 base.trim_end_matches('/'),
226 model
227 );
228
229 let body = serde_json::json!({
230 "contents": [{ "parts": [{ "text": req.prompt }] }],
231 "generationConfig": { "response_modalities": ["IMAGE", "TEXT"] }
232 });
233
234 let mut builder = client.post(url).header("x-goog-api-key", &key).json(&body);
235 for (k, v) in &cfg.extra_headers {
236 builder = builder.header(k, v);
237 }
238
239 let (resp, _headers) = send_json::<Value>(builder).await?;
240 let usage = parse_usage(&resp);
241 let images = extract_images_from_candidates(&resp);
242
243 Ok(ImageResponse {
244 images,
245 usage,
246 raw: Some(resp),
247 })
248 } else {
249 let url = format!("{}/models/{}:predict", base.trim_end_matches('/'), model);
251
252 let body = serde_json::json!({
253 "instances": [{ "prompt": req.prompt }],
254 "parameters": { "sampleCount": n }
255 });
256
257 let mut builder = client.post(url).header("x-goog-api-key", &key).json(&body);
258 for (k, v) in &cfg.extra_headers {
259 builder = builder.header(k, v);
260 }
261
262 let (resp, _headers) = send_json::<Value>(builder).await?;
263 let images = extract_images_from_predictions(&resp);
264
265 Ok(ImageResponse {
266 images,
267 usage: Usage::default(),
268 raw: Some(resp),
269 })
270 }
271}
272
273pub async fn image_editing(
274 client: &Client,
275 cfg: &ProviderConfig,
276 req: ImageEditRequest,
277) -> Result<ImageResponse> {
278 let base = cfg
279 .base_url
280 .clone()
281 .ok_or_else(|| LiteLLMError::Config("base_url required".into()))?;
282 let key = resolve_api_key(cfg)?
283 .ok_or_else(|| LiteLLMError::MissingApiKey("GEMINI_API_KEY".into()))?;
284 let model = req.model.trim_start_matches("models/");
285
286 let url = format!(
287 "{}/models/{}:generateContent",
288 base.trim_end_matches('/'),
289 model
290 );
291
292 let mut parts = Vec::with_capacity(req.images.len() + 1);
293 parts.push(serde_json::json!({ "text": req.prompt }));
294 for image in &req.images {
295 parts.push(image_input_to_part(client, image).await?);
296 }
297
298 let body = serde_json::json!({
299 "contents": [{ "role": "user", "parts": parts }],
300 "generationConfig": {
301 "response_modalities": ["IMAGE", "TEXT"]
302 }
303 });
304
305 let mut builder = client.post(url).header("x-goog-api-key", &key).json(&body);
306 for (k, v) in &cfg.extra_headers {
307 builder = builder.header(k, v);
308 }
309
310 let (resp, _headers) = send_json::<Value>(builder).await?;
311 let usage = parse_usage(&resp);
312 let images = extract_images_from_candidates(&resp);
313
314 Ok(ImageResponse {
315 images,
316 usage,
317 raw: Some(resp),
318 })
319}
320
321async fn image_input_to_part(_client: &Client, input: &ImageInputData) -> Result<Value> {
322 if let Some(b64) = &input.b64_json {
323 let mime = input
324 .mime_type
325 .as_deref()
326 .unwrap_or("image/png")
327 .to_string();
328 return Ok(serde_json::json!({
329 "inline_data": { "mime_type": mime, "data": b64 }
330 }));
331 }
332 if let Some(url) = &input.url {
333 return process_gemini_media_url(url, input.mime_type.as_deref(), None, None, "");
334 }
335 Err(LiteLLMError::Config(
336 "ImageInputData requires b64_json or url".into(),
337 ))
338}
339
340fn extract_images_from_candidates(resp: &Value) -> Vec<ImageData> {
341 let mut images = Vec::new();
342 if let Some(candidates) = resp.get("candidates").and_then(|v| v.as_array()) {
343 for candidate in candidates {
344 if let Some(parts) = candidate
345 .get("content")
346 .and_then(|v| v.get("parts"))
347 .and_then(|v| v.as_array())
348 {
349 for part in parts {
350 if let Some(data) = part
351 .get("inlineData")
352 .and_then(|v| v.get("data"))
353 .and_then(|v| v.as_str())
354 {
355 let mime = part
356 .get("inlineData")
357 .and_then(|v| v.get("mimeType"))
358 .and_then(|v| v.as_str())
359 .map(|s| s.to_string());
360 images.push(ImageData {
361 b64_json: Some(data.to_string()),
362 url: None,
363 revised_prompt: None,
364 mime_type: mime,
365 });
366 }
367 }
368 }
369 }
370 }
371 images
372}
373
374fn extract_images_from_predictions(resp: &Value) -> Vec<ImageData> {
375 let mut images = Vec::new();
376 if let Some(predictions) = resp.get("predictions").and_then(|v| v.as_array()) {
377 for prediction in predictions {
378 if let Some(data) = prediction
379 .get("bytesBase64Encoded")
380 .and_then(|v| v.as_str())
381 {
382 let mime = prediction
383 .get("mimeType")
384 .and_then(|v| v.as_str())
385 .map(|s| s.to_string());
386 images.push(ImageData {
387 b64_json: Some(data.to_string()),
388 url: None,
389 revised_prompt: None,
390 mime_type: mime,
391 });
392 }
393 }
394 }
395 images
396}
397
398fn extract_text(resp: &Value) -> String {
399 if let Some(candidates) = resp.get("candidates").and_then(|v| v.as_array()) {
400 if let Some(first) = candidates.first() {
401 if let Some(parts) = first
402 .get("content")
403 .and_then(|v| v.get("parts"))
404 .and_then(|v| v.as_array())
405 {
406 let mut text = String::new();
407 for part in parts {
408 if let Some(piece) = part.get("text").and_then(|v| v.as_str()) {
409 text.push_str(piece);
410 }
411 }
412 return text;
413 }
414 }
415 }
416 String::new()
417}
418
419fn parse_usage(resp: &Value) -> Usage {
420 if let Some(meta) = resp.get("usageMetadata").and_then(|v| v.as_object()) {
421 return Usage {
422 prompt_tokens: meta.get("promptTokenCount").and_then(|v| v.as_u64()),
423 completion_tokens: meta.get("candidatesTokenCount").and_then(|v| v.as_u64()),
424 thoughts_tokens: meta.get("thoughtsTokenCount").and_then(|v| v.as_u64()),
425 total_tokens: meta.get("totalTokenCount").and_then(|v| v.as_u64()),
426 cost_usd: None,
427 };
428 }
429 Usage::default()
430}
431
432fn extract_system_instruction(messages: &mut Vec<ChatMessage>) -> Result<Vec<Value>> {
433 let mut parts = Vec::new();
434 let mut indices = Vec::new();
435 for (idx, msg) in messages.iter().enumerate() {
436 if msg.role == "system" {
437 match &msg.content {
438 ChatMessageContent::Text(text) => {
439 if !text.is_empty() {
440 parts.push(serde_json::json!({ "text": text }));
441 }
442 }
443 ChatMessageContent::Parts(content_parts) => {
444 let mut system_text = String::new();
445 for part in content_parts {
446 if let ChatContentPart::Text(ChatContentPartText { text, .. }) = part {
447 system_text.push_str(text);
448 }
449 }
450 if !system_text.is_empty() {
451 parts.push(serde_json::json!({ "text": system_text }));
452 }
453 }
454 }
455 indices.push(idx);
456 }
457 }
458 for idx in indices.into_iter().rev() {
459 messages.remove(idx);
460 }
461 Ok(parts)
462}
463
464async fn gemini_contents_from_messages(
465 client: &Client,
466 messages: Vec<ChatMessage>,
467 model: &str,
468) -> Result<Vec<Value>> {
469 let mut contents: Vec<Value> = Vec::new();
470 let mut msg_i = 0;
471 let mut tool_call_responses: Vec<Value> = Vec::new();
472 let mut last_tool_calls: Vec<ToolCallInfo> = Vec::new();
473
474 while msg_i < messages.len() {
475 let role = messages[msg_i].role.as_str();
476 if role == "user" {
477 let mut parts: Vec<Value> = Vec::new();
478 while msg_i < messages.len() && messages[msg_i].role == "user" {
479 parts.extend(
480 gemini_parts_from_content(client, &messages[msg_i].content, model).await?,
481 );
482 msg_i += 1;
483 }
484 if !parts.is_empty() {
485 contents.push(serde_json::json!({ "role": "user", "parts": parts }));
486 }
487 continue;
488 }
489
490 if role == "assistant" {
491 let mut parts: Vec<Value> = Vec::new();
492 while msg_i < messages.len() && messages[msg_i].role == "assistant" {
493 let message = &messages[msg_i];
494 parts.extend(gemini_parts_from_content(client, &message.content, model).await?);
495 let (tool_parts, tool_infos) = tool_call_parts_from_message(message)?;
496 if !tool_parts.is_empty() {
497 parts.extend(tool_parts);
498 if !tool_infos.is_empty() {
499 last_tool_calls = tool_infos;
500 }
501 }
502 msg_i += 1;
503 }
504 if !parts.is_empty() {
505 contents.push(serde_json::json!({ "role": "model", "parts": parts }));
506 }
507 continue;
508 }
509
510 if role == "tool" || role == "function" {
511 while msg_i < messages.len()
512 && (messages[msg_i].role == "tool" || messages[msg_i].role == "function")
513 {
514 let response_parts =
515 tool_response_parts(client, &messages[msg_i], &last_tool_calls).await?;
516 tool_call_responses.extend(response_parts);
517 msg_i += 1;
518 }
519 if !tool_call_responses.is_empty() {
520 contents.push(serde_json::json!({ "parts": tool_call_responses }));
521 tool_call_responses = Vec::new();
522 }
523 continue;
524 }
525
526 let parts = gemini_parts_from_content(client, &messages[msg_i].content, model).await?;
527 if !parts.is_empty() {
528 contents.push(serde_json::json!({ "role": "user", "parts": parts }));
529 }
530 msg_i += 1;
531 }
532
533 if !tool_call_responses.is_empty() {
534 contents.push(serde_json::json!({ "parts": tool_call_responses }));
535 }
536 if contents.is_empty() {
537 contents.push(serde_json::json!({
538 "role": "user",
539 "parts": [{ "text": " " }]
540 }));
541 }
542 Ok(contents)
543}
544
545async fn gemini_parts_from_content(
546 _client: &Client,
547 content: &ChatMessageContent,
548 model: &str,
549) -> Result<Vec<Value>> {
550 match content {
551 ChatMessageContent::Text(text) => Ok(vec![serde_json::json!({ "text": text })]),
552 ChatMessageContent::Parts(parts) => {
553 let mut out = Vec::new();
554 for part in parts {
555 match part {
556 ChatContentPart::Text(ChatContentPartText { text, .. }) => {
557 if !text.is_empty() {
558 out.push(serde_json::json!({ "text": text }));
559 }
560 }
561 ChatContentPart::ImageUrl(ChatContentPartImageUrl { image_url, .. }) => {
562 let detail = match image_url {
563 ChatImageUrl::Object(obj) => obj.detail.as_deref(),
564 ChatImageUrl::Url(_) => None,
565 };
566 out.push(process_gemini_media(image_url, detail, None, None, model)?);
567 }
568 ChatContentPart::InputAudio(ChatContentPartInputAudio {
569 input_audio, ..
570 }) => {
571 out.push(process_gemini_audio(input_audio)?);
572 }
573 ChatContentPart::File(ChatContentPartFile { file, .. }) => {
574 out.push(process_gemini_file(file, model)?);
575 }
576 ChatContentPart::Other(value) => {
577 return Err(LiteLLMError::Config(format!(
578 "unsupported gemini content part: {}",
579 value
580 )));
581 }
582 }
583 }
584 Ok(out)
585 }
586 }
587}
588
589#[derive(Debug, Clone)]
590struct ToolCallInfo {
591 id: Option<String>,
592 name: String,
593}
594
595fn tool_call_parts_from_message(message: &ChatMessage) -> Result<(Vec<Value>, Vec<ToolCallInfo>)> {
596 let mut parts = Vec::new();
597 let mut infos = Vec::new();
598
599 if let Some(tool_calls) = &message.tool_calls {
600 if let Some(array) = tool_calls.as_array() {
601 for tool in array {
602 let function = tool
603 .get("function")
604 .ok_or_else(|| LiteLLMError::Config("tool_call missing function".into()))?;
605 let name = function
606 .get("name")
607 .and_then(|v| v.as_str())
608 .ok_or_else(|| LiteLLMError::Config("tool_call missing name".into()))?;
609 let args_raw = function
610 .get("arguments")
611 .and_then(|v| v.as_str())
612 .unwrap_or("");
613 let args = if args_raw.trim().is_empty() {
614 Value::Object(serde_json::Map::new())
615 } else {
616 serde_json::from_str(args_raw)
617 .map_err(|e| LiteLLMError::Parse(e.to_string()))?
618 };
619 parts.push(serde_json::json!({
620 "function_call": { "name": name, "args": args }
621 }));
622 let id = tool
623 .get("id")
624 .and_then(|v| v.as_str())
625 .map(|s| s.to_string());
626 infos.push(ToolCallInfo {
627 id,
628 name: name.to_string(),
629 });
630 }
631 return Ok((parts, infos));
632 }
633 }
634
635 if let Some(function_call) = &message.function_call {
636 let name = function_call
637 .get("name")
638 .and_then(|v| v.as_str())
639 .ok_or_else(|| LiteLLMError::Config("function_call missing name".into()))?;
640 let args_raw = function_call
641 .get("arguments")
642 .and_then(|v| v.as_str())
643 .unwrap_or("");
644 let args = if args_raw.trim().is_empty() {
645 Value::Object(serde_json::Map::new())
646 } else {
647 serde_json::from_str(args_raw).map_err(|e| LiteLLMError::Parse(e.to_string()))?
648 };
649 parts.push(serde_json::json!({
650 "function_call": { "name": name, "args": args }
651 }));
652 infos.push(ToolCallInfo {
653 id: None,
654 name: name.to_string(),
655 });
656 }
657
658 Ok((parts, infos))
659}
660
661async fn tool_response_parts(
662 client: &Client,
663 message: &ChatMessage,
664 last_tool_calls: &[ToolCallInfo],
665) -> Result<Vec<Value>> {
666 let name = resolve_tool_name(message, last_tool_calls)?;
667 let (content_str, mut inline_parts) = extract_tool_content(client, &message.content).await?;
668 let response = parse_tool_response_data(&content_str);
669 let function_part = serde_json::json!({
670 "function_response": { "name": name, "response": response }
671 });
672 let mut parts = vec![function_part];
673 parts.append(&mut inline_parts);
674 Ok(parts)
675}
676
677fn resolve_tool_name(message: &ChatMessage, last_tool_calls: &[ToolCallInfo]) -> Result<String> {
678 if let Some(name) = &message.name {
679 return Ok(name.clone());
680 }
681 if let Some(tool_call_id) = &message.tool_call_id {
682 if let Some(info) = last_tool_calls
683 .iter()
684 .find(|info| info.id.as_deref() == Some(tool_call_id.as_str()))
685 {
686 return Ok(info.name.clone());
687 }
688 }
689 Err(LiteLLMError::Config(
690 "missing tool name for tool response".into(),
691 ))
692}
693
694async fn extract_tool_content(
695 client: &Client,
696 content: &ChatMessageContent,
697) -> Result<(String, Vec<Value>)> {
698 let mut text = String::new();
699 let mut inline_parts = Vec::new();
700 match content {
701 ChatMessageContent::Text(t) => {
702 text.push_str(t);
703 }
704 ChatMessageContent::Parts(parts) => {
705 for part in parts {
706 match part {
707 ChatContentPart::Text(ChatContentPartText { text: t, .. }) => {
708 text.push_str(t);
709 }
710 ChatContentPart::ImageUrl(ChatContentPartImageUrl { image_url, .. }) => {
711 inline_parts.push(inline_data_from_image_url(client, image_url).await?);
712 }
713 ChatContentPart::InputAudio(ChatContentPartInputAudio {
714 input_audio, ..
715 }) => {
716 inline_parts.push(inline_data_from_audio(input_audio)?);
717 }
718 ChatContentPart::File(ChatContentPartFile { file, .. }) => {
719 inline_parts.push(inline_data_from_file(client, file).await?);
720 }
721 ChatContentPart::Other(_) => {}
722 }
723 }
724 }
725 }
726 Ok((text, inline_parts))
727}
728
729fn parse_tool_response_data(content: &str) -> Value {
730 let trimmed = content.trim();
731 if trimmed.starts_with('{') || trimmed.starts_with('[') {
732 if let Ok(value) = serde_json::from_str(trimmed) {
733 return value;
734 }
735 }
736 serde_json::json!({ "content": content })
737}
738
739async fn inline_data_from_image_url(client: &Client, image_url: &ChatImageUrl) -> Result<Value> {
740 let (url, format) = match image_url {
741 ChatImageUrl::Url(url) => (url.as_str(), None),
742 ChatImageUrl::Object(obj) => (obj.url.as_str(), obj.format.as_deref()),
743 };
744 inline_data_from_url(client, url, format).await
745}
746
747fn inline_data_from_audio(input_audio: &ChatInputAudio) -> Result<Value> {
748 let format = if input_audio.format.starts_with("audio/") {
749 input_audio.format.clone()
750 } else {
751 format!("audio/{}", input_audio.format)
752 };
753 Ok(serde_json::json!({
754 "inline_data": { "mime_type": format, "data": input_audio.data }
755 }))
756}
757
758async fn inline_data_from_file(client: &Client, file: &ChatFile) -> Result<Value> {
759 let passed = file
760 .file_id
761 .as_ref()
762 .or(file.file_data.as_ref())
763 .ok_or_else(|| LiteLLMError::Config("file_id or file_data required".into()))?;
764 inline_data_from_url(client, passed, file.format.as_deref()).await
765}
766
767async fn inline_data_from_url(client: &Client, url: &str, format: Option<&str>) -> Result<Value> {
768 if let Some((mime, data)) = parse_data_url(url, format) {
769 return Ok(serde_json::json!({
770 "inline_data": { "mime_type": mime, "data": data }
771 }));
772 }
773 if url.starts_with("http://") || url.starts_with("https://") {
774 let (mime, data) = fetch_bytes_with_mime(client, url, format).await?;
775 return Ok(serde_json::json!({
776 "inline_data": { "mime_type": mime, "data": data }
777 }));
778 }
779 Err(LiteLLMError::Config("unsupported inline data url".into()))
780}
781
782async fn fetch_bytes_with_mime(
783 client: &Client,
784 url: &str,
785 format: Option<&str>,
786) -> Result<(String, String)> {
787 let resp = client.get(url).send().await.map_err(LiteLLMError::from)?;
788 let headers = resp.headers().clone();
789 let bytes = resp.bytes().await.map_err(LiteLLMError::from)?;
790 let header_mime = headers
791 .get(CONTENT_TYPE)
792 .and_then(|v| v.to_str().ok())
793 .map(|v| v.split(';').next().unwrap_or(v).trim().to_string());
794 let mime = format
795 .map(|v| v.to_string())
796 .or(header_mime)
797 .or_else(|| mime_type_from_url(url))
798 .unwrap_or_else(|| "application/octet-stream".to_string());
799 let data = general_purpose::STANDARD.encode(bytes);
800 Ok((mime, data))
801}
802
803fn process_gemini_audio(input_audio: &ChatInputAudio) -> Result<Value> {
804 let format = if input_audio.format.starts_with("audio/") {
805 input_audio.format.clone()
806 } else {
807 format!("audio/{}", input_audio.format)
808 };
809 Ok(serde_json::json!({
810 "inline_data": {
811 "mime_type": format,
812 "data": input_audio.data
813 }
814 }))
815}
816
817fn process_gemini_file(file: &ChatFile, model: &str) -> Result<Value> {
818 let passed = file
819 .file_id
820 .as_ref()
821 .or(file.file_data.as_ref())
822 .ok_or_else(|| LiteLLMError::Config("file_id or file_data required".into()))?;
823 process_gemini_media_url(
824 passed,
825 file.format.as_deref(),
826 file.detail.as_deref(),
827 file.video_metadata.as_ref(),
828 model,
829 )
830}
831
832fn process_gemini_media(
833 image_url: &ChatImageUrl,
834 detail: Option<&str>,
835 video_metadata: Option<&Value>,
836 format: Option<&str>,
837 model: &str,
838) -> Result<Value> {
839 let (url, format) = match image_url {
840 ChatImageUrl::Url(url) => (url.as_str(), format),
841 ChatImageUrl::Object(obj) => (obj.url.as_str(), obj.format.as_deref().or(format)),
842 };
843 process_gemini_media_url(url, format, detail, video_metadata, model)
844}
845
846fn process_gemini_media_url(
847 url: &str,
848 format: Option<&str>,
849 detail: Option<&str>,
850 video_metadata: Option<&Value>,
851 _model: &str,
852) -> Result<Value> {
853 if url.starts_with("gs://") || url.starts_with("https://") || url.starts_with("http://") {
854 let mime_type = format
855 .map(|v| v.to_string())
856 .or_else(|| mime_type_from_url(url))
857 .ok_or_else(|| LiteLLMError::Config("missing media mime type".into()))?;
858 let mut part = serde_json::json!({
859 "file_data": { "mime_type": mime_type, "file_uri": url }
860 });
861 apply_gemini_metadata(&mut part, detail, video_metadata);
862 return Ok(part);
863 }
864
865 if let Some((media_type, data)) = parse_data_url(url, format) {
866 let mut part = serde_json::json!({
867 "inline_data": { "mime_type": media_type, "data": data }
868 });
869 apply_gemini_metadata(&mut part, detail, video_metadata);
870 return Ok(part);
871 }
872
873 Err(LiteLLMError::Config("unsupported gemini media url".into()))
874}
875
876fn apply_gemini_metadata(part: &mut Value, detail: Option<&str>, video_metadata: Option<&Value>) {
877 if let Some(detail) = detail {
878 if let Some(level) = detail_to_media_resolution(detail) {
879 if let Some(obj) = part.as_object_mut() {
880 obj.insert(
881 "media_resolution".to_string(),
882 serde_json::json!({ "level": level }),
883 );
884 }
885 }
886 }
887 if let Some(video_metadata) = video_metadata {
888 if let Some(obj) = part.as_object_mut() {
889 obj.insert("video_metadata".to_string(), video_metadata.clone());
890 }
891 }
892}
893
894fn detail_to_media_resolution(detail: &str) -> Option<&'static str> {
895 match detail {
896 "low" => Some("MEDIA_RESOLUTION_LOW"),
897 "medium" => Some("MEDIA_RESOLUTION_MEDIUM"),
898 "high" => Some("MEDIA_RESOLUTION_HIGH"),
899 "ultra_high" => Some("MEDIA_RESOLUTION_ULTRA_HIGH"),
900 _ => None,
901 }
902}
903
904fn parse_data_url(url: &str, override_format: Option<&str>) -> Option<(String, String)> {
905 if url.starts_with("data:") {
906 let stripped = url.strip_prefix("data:").unwrap_or(url);
907 if let Some((meta, data)) = stripped.split_once(',') {
908 let mut media_type = meta.split(';').next().unwrap_or("application/octet-stream");
909 if let Some(fmt) = override_format {
910 media_type = fmt;
911 }
912 return Some((media_type.to_string(), data.to_string()));
913 }
914 }
915 None
916}
917
918fn mime_type_from_url(url: &str) -> Option<String> {
919 let path = url.split('?').next().unwrap_or(url);
920 MimeGuess::from_path(path)
921 .first_raw()
922 .map(|m| m.to_string())
923}
924
925fn extract_video_uri(resp: &Value) -> Option<String> {
926 let response = resp.get("response")?;
927 if let Some(uri) = response
928 .pointer("/generateVideoResponse/generatedSamples/0/video/uri")
929 .and_then(|v| v.as_str())
930 {
931 return Some(uri.to_string());
932 }
933 if let Some(uri) = response
934 .pointer("/generatedVideos/0/uri")
935 .and_then(|v| v.as_str())
936 {
937 return Some(uri.to_string());
938 }
939 if let Some(uri) = response.pointer("/videos/0/uri").and_then(|v| v.as_str()) {
940 return Some(uri.to_string());
941 }
942 None
943}