entelix_core/codecs/
gemini.rs

1//! `GeminiCodec` — IR ⇄ Google Gemini `generateContent` API
2//! (`POST /v1beta/models/{model}:generateContent`,
3//!   `POST /v1beta/models/{model}:streamGenerateContent?alt=sse`).
4//!
5//! Wire format reference:
6//! <https://ai.google.dev/api/rest/v1beta/models/generateContent>.
7//!
8//! Notable mappings:
9//!
10//! - IR `messages` → `contents: [{role: "user"|"model", parts: [...]}]`.
11//!   Gemini uses `"model"` for assistant turns.
12//! - IR `system: Option<String>` + IR `Role::System` → top-level
13//!   `systemInstruction: { parts: [{ text }] }`.
14//! - IR `Role::Tool` → `contents: [{role: "user", parts: [{
15//!   functionResponse: { name, response: { ... } } }]}]`. Gemini does
16//!   not roundtrip `tool_use_id`; the codec records the `LossyEncode`.
17//! - IR `ContentPart::ToolUse` → `parts: [{ functionCall: { name, args } }]`.
18//! - IR `tools` → `tools: [{ functionDeclarations: [...] }]`.
19//! - IR `tool_choice` → `toolConfig: { functionCallingConfig: { mode } }`.
20//! - Streaming SSE: `data: {...}\n\n` per chunk; each chunk is a full
21//!   `GenerateContentResponse` with delta text in `candidates[0].content`.
22
23#![allow(clippy::cast_possible_truncation)]
24
25use bytes::Bytes;
26use futures::StreamExt;
27use serde_json::{Map, Value, json};
28
29use crate::codecs::codec::{BoxByteStream, BoxDeltaStream, Codec, EncodedRequest};
30use crate::error::{Error, Result};
31use crate::ir::{
32    Capabilities, CitationSource, ContentPart, MediaSource, ModelRequest, ModelResponse,
33    ModelWarning, OutputStrategy, ProviderEchoSnapshot, ReasoningEffort, RefusalReason,
34    ResponseFormat, Role, SafetyCategory, SafetyLevel, SafetyRating, StopReason, ToolChoice,
35    ToolKind, ToolResultContent, Usage,
36};
37use crate::stream::StreamDelta;
38
39const DEFAULT_MAX_CONTEXT_TOKENS: u32 = 1_000_000;
40
41/// Provider key for [`GeminiCodec`] (and its [`super::VertexGeminiCodec`]
42/// composition wrapper) — identifies this vendor's entries in
43/// [`ProviderEchoSnapshot`]. The wire shape is identical across AI
44/// Studio and Vertex AI; only the routing differs.
45const PROVIDER_KEY: &str = "gemini";
46
47/// Wire field name Gemini uses for the opaque thought-signature
48/// round-trip token. Vertex AI strictly rejects camelCase
49/// (`thoughtSignature`) on encode and requires snake_case; AI Studio
50/// accepts both. Encoding always emits snake_case for portability.
51/// Decoder accepts both because Gemini servers have historically
52/// emitted both spellings depending on transport and version.
53const WIRE_THOUGHT_SIGNATURE: &str = "thought_signature";
54const WIRE_THOUGHT_SIGNATURE_LEGACY: &str = "thoughtSignature";
55
56/// Extract a Gemini `thought_signature` from a `Part` (or any JSON
57/// object that may carry it as a sibling field), accepting both
58/// snake_case and the legacy camelCase spelling. Returns the wrapping
59/// `ProviderEchoSnapshot` carrier ready to attach to a `ContentPart`.
60fn decode_thought_signature(obj: &Value) -> Option<ProviderEchoSnapshot> {
61    let sig = obj
62        .get(WIRE_THOUGHT_SIGNATURE)
63        .or_else(|| obj.get(WIRE_THOUGHT_SIGNATURE_LEGACY)) // silent-fallback-ok: snake_case + camelCase are both valid vendor spellings of the same field — accepting either is the contract, no default injected.
64        .and_then(Value::as_str)?;
65    Some(ProviderEchoSnapshot::for_provider(
66        PROVIDER_KEY,
67        WIRE_THOUGHT_SIGNATURE,
68        sig.to_owned(),
69    ))
70}
71
72/// Look up this codec's `thought_signature` payload from a part's
73/// echoes. Returns the raw signature string so the caller can stamp
74/// it onto the appropriate wire-format object.
75fn encode_thought_signature(echoes: &[ProviderEchoSnapshot]) -> Option<&str> {
76    ProviderEchoSnapshot::find_in(echoes, PROVIDER_KEY)
77        .and_then(|e| e.payload_str(WIRE_THOUGHT_SIGNATURE))
78}
79
80/// Stateless codec for the Gemini `generateContent` family of endpoints.
81#[derive(Clone, Copy, Debug, Default)]
82pub struct GeminiCodec;
83
84impl GeminiCodec {
85    /// Create a fresh codec instance.
86    pub const fn new() -> Self {
87        Self
88    }
89}
90
91impl Codec for GeminiCodec {
92    fn name(&self) -> &'static str {
93        PROVIDER_KEY
94    }
95
96    fn capabilities(&self, _model: &str) -> Capabilities {
97        Capabilities {
98            streaming: true,
99            tools: true,
100            multimodal_image: true,
101            multimodal_audio: true,
102            multimodal_video: true,
103            multimodal_document: true,
104            system_prompt: true,
105            structured_output: true,
106            prompt_caching: true,
107            thinking: true,
108            citations: true,
109            web_search: true,
110            computer_use: false,
111            max_context_tokens: DEFAULT_MAX_CONTEXT_TOKENS,
112        }
113    }
114
115    fn encode(&self, request: &ModelRequest) -> Result<EncodedRequest> {
116        let (body, warnings) = build_body(request)?;
117        finalize_request(&request.model, &body, warnings, false)
118    }
119
120    fn encode_streaming(&self, request: &ModelRequest) -> Result<EncodedRequest> {
121        let (body, warnings) = build_body(request)?;
122        let mut encoded = finalize_request(&request.model, &body, warnings, true)?;
123        encoded.headers.insert(
124            http::header::ACCEPT,
125            http::HeaderValue::from_static("text/event-stream"),
126        );
127        Ok(encoded.into_streaming())
128    }
129
130    fn decode(&self, body: &[u8], warnings_in: Vec<ModelWarning>) -> Result<ModelResponse> {
131        let raw: Value = super::codec::parse_response_body(body, "Gemini")?;
132        let mut warnings = warnings_in;
133        let id = String::new(); // Gemini one-shot responses lack a top-level id
134        let model = str_field(&raw, "modelVersion").to_owned();
135        let mut usage = decode_usage(raw.get("usageMetadata"));
136        // Lift the candidate-scoped safetyRatings onto Usage so consumers
137        // see safety on a single canonical channel.
138        if let Some(candidate) = raw
139            .get("candidates")
140            .and_then(Value::as_array)
141            .and_then(|a| a.first())
142        {
143            usage.safety_ratings = decode_safety_ratings(candidate);
144        }
145        let (content, stop_reason) = decode_candidate(&raw, &mut warnings);
146        Ok(ModelResponse {
147            id,
148            model,
149            stop_reason,
150            content,
151            usage,
152            rate_limit: None,
153            warnings,
154            provider_echoes: Vec::new(),
155        })
156    }
157
158    fn decode_stream<'a>(
159        &'a self,
160        bytes: BoxByteStream<'a>,
161        warnings_in: Vec<ModelWarning>,
162    ) -> BoxDeltaStream<'a> {
163        Box::pin(stream_gemini(bytes, warnings_in))
164    }
165}
166
167// ── body builders ──────────────────────────────────────────────────────────
168
169fn build_body(request: &ModelRequest) -> Result<(Value, Vec<ModelWarning>)> {
170    if request.messages.is_empty() && request.system.is_empty() {
171        return Err(Error::invalid_request(
172            "Gemini generateContent requires at least one message",
173        ));
174    }
175    let mut warnings = Vec::new();
176    let (system_text, contents) = encode_messages(request, &mut warnings);
177
178    let mut body = Map::new();
179    body.insert("contents".into(), Value::Array(contents));
180    if let Some(text) = system_text {
181        body.insert(
182            "systemInstruction".into(),
183            json!({ "parts": [{ "text": text }] }),
184        );
185    }
186
187    let mut generation_config = Map::new();
188    if let Some(t) = request.max_tokens {
189        generation_config.insert("maxOutputTokens".into(), json!(t));
190    }
191    if let Some(t) = request.temperature {
192        generation_config.insert("temperature".into(), json!(t));
193    }
194    if let Some(p) = request.top_p {
195        generation_config.insert("topP".into(), json!(p));
196    }
197    if let Some(k) = request.top_k {
198        generation_config.insert("topK".into(), json!(k));
199    }
200    if !request.stop_sequences.is_empty() {
201        generation_config.insert("stopSequences".into(), json!(request.stop_sequences));
202    }
203    if let Some(format) = &request.response_format {
204        encode_gemini_structured_output(format, &mut generation_config, &mut body, &mut warnings)?;
205    }
206    if let Some(effort) = &request.reasoning_effort {
207        encode_gemini_thinking(
208            &request.model,
209            effort,
210            &mut generation_config,
211            &mut warnings,
212        );
213    }
214    if !generation_config.is_empty() {
215        body.insert("generationConfig".into(), Value::Object(generation_config));
216    }
217    if !request.tools.is_empty() {
218        body.insert("tools".into(), encode_tools(&request.tools, &mut warnings));
219        body.insert(
220            "toolConfig".into(),
221            encode_tool_choice(&request.tool_choice),
222        );
223    }
224    apply_provider_extensions(request, &mut body, &mut warnings);
225    Ok((Value::Object(body), warnings))
226}
227
228/// Read [`crate::ir::GeminiExt`] and merge each set field into the
229/// wire body. `candidate_count` lands inside `generationConfig`,
230/// creating the map if `build_body` did not already emit one.
231/// Foreign-vendor extensions surface as
232/// [`ModelWarning::ProviderExtensionIgnored`].
233fn apply_provider_extensions(
234    request: &ModelRequest,
235    body: &mut Map<String, Value>,
236    warnings: &mut Vec<ModelWarning>,
237) {
238    let ext = &request.provider_extensions;
239    // Gemini has no native parallel-tool toggle on the
240    // generateContent surface. Surface the lossy snap so the operator
241    // sees their `parallel_tool_calls` setting was dropped on the
242    // wire instead of debugging a silently-ignored knob.
243    if request.parallel_tool_calls.is_some() {
244        warnings.push(ModelWarning::LossyEncode {
245            field: "parallel_tool_calls".into(),
246            detail: "Gemini exposes no parallel-tool toggle — setting dropped".into(),
247        });
248    }
249    if let Some(gemini) = &ext.gemini {
250        if !gemini.safety_settings.is_empty() {
251            let arr: Vec<Value> = gemini
252                .safety_settings
253                .iter()
254                .map(|o| {
255                    json!({
256                        "category": o.category,
257                        "threshold": o.threshold,
258                    })
259                })
260                .collect();
261            body.insert("safetySettings".into(), Value::Array(arr));
262        }
263        if let Some(n) = gemini.candidate_count {
264            let entry = body
265                .entry("generationConfig")
266                .or_insert_with(|| Value::Object(Map::new()));
267            if let Some(map) = entry.as_object_mut() {
268                map.insert("candidateCount".into(), json!(n));
269            }
270        }
271        if let Some(name) = &gemini.cached_content {
272            body.insert("cachedContent".into(), Value::String(name.clone()));
273        }
274        if gemini.url_context.is_some() {
275            // Append the parameterless `url_context` tool to the
276            // request's tools array (allocate the array if it
277            // wasn't populated by `encode_tools`).
278            let entry = body
279                .entry("tools")
280                .or_insert_with(|| Value::Array(Vec::new()));
281            if let Some(arr) = entry.as_array_mut() {
282                arr.push(json!({ "url_context": {} }));
283            }
284        }
285    }
286    if let Some(seed) = request.seed {
287        let entry = body
288            .entry("generationConfig")
289            .or_insert_with(|| Value::Object(Map::new()));
290        if let Some(map) = entry.as_object_mut() {
291            map.insert("seed".into(), json!(seed));
292        }
293    }
294    if request.end_user_id.is_some() {
295        warnings.push(ModelWarning::LossyEncode {
296            field: "end_user_id".into(),
297            detail: "Gemini has no end-user attribution channel — drop the field".into(),
298        });
299    }
300    if ext.anthropic.is_some() {
301        warnings.push(ModelWarning::ProviderExtensionIgnored {
302            vendor: "anthropic".into(),
303        });
304    }
305    if ext.openai_chat.is_some() {
306        warnings.push(ModelWarning::ProviderExtensionIgnored {
307            vendor: "openai_chat".into(),
308        });
309    }
310    if ext.openai_responses.is_some() {
311        warnings.push(ModelWarning::ProviderExtensionIgnored {
312            vendor: "openai_responses".into(),
313        });
314    }
315    if ext.bedrock.is_some() {
316        warnings.push(ModelWarning::ProviderExtensionIgnored {
317            vendor: "bedrock".into(),
318        });
319    }
320}
321
322/// Resolve [`OutputStrategy`] and emit the Gemini native
323/// `responseJsonSchema` (Native) or a forced-tool surface (Tool).
324/// `Auto` resolves to `Native` — Gemini's `responseJsonSchema` is
325/// the most direct surface and Gemini 2.5+ always strict-validates.
326fn encode_gemini_structured_output(
327    format: &ResponseFormat,
328    generation_config: &mut Map<String, Value>,
329    body: &mut Map<String, Value>,
330    warnings: &mut Vec<ModelWarning>,
331) -> Result<()> {
332    let strategy = match format.strategy {
333        OutputStrategy::Auto | OutputStrategy::Native => OutputStrategy::Native,
334        explicit => explicit,
335    };
336    match strategy {
337        OutputStrategy::Native => {
338            generation_config.insert("responseMimeType".into(), json!("application/json"));
339            generation_config.insert(
340                "responseJsonSchema".into(),
341                format.json_schema.schema.clone(),
342            );
343            if !format.strict {
344                warnings.push(ModelWarning::LossyEncode {
345                    field: "response_format.strict".into(),
346                    detail: "Gemini always strict-validates structured output; \
347                         the strict=false request was approximated"
348                        .into(),
349                });
350            }
351        }
352        OutputStrategy::Tool => {
353            // Forced single function call. Gemini wraps tools as
354            // `tools[0].functionDeclarations[0]` and `toolConfig`
355            // narrows the selection; `mode: "ANY"` +
356            // `allowedFunctionNames: [name]` is the canonical
357            // forced-call shape.
358            let tool_name = format.json_schema.name.clone();
359            let synthetic_decl = json!({
360                "name": tool_name,
361                "description": format!(
362                    "Emit the response as a JSON object matching the {tool_name} schema."
363                ),
364                "parameters": format.json_schema.schema.clone(),
365            });
366            body.insert(
367                "tools".into(),
368                json!([{
369                    "functionDeclarations": [synthetic_decl],
370                }]),
371            );
372            body.insert(
373                "toolConfig".into(),
374                json!({
375                    "functionCallingConfig": {
376                        "mode": "ANY",
377                        "allowedFunctionNames": [format.json_schema.name],
378                    }
379                }),
380            );
381            if !format.strict {
382                warnings.push(ModelWarning::LossyEncode {
383                    field: "response_format.strict".into(),
384                    detail: "Gemini Tool-strategy structured output is always \
385                         schema-validated; strict=false was approximated"
386                        .into(),
387                });
388            }
389        }
390        OutputStrategy::Prompted => {
391            return Err(Error::invalid_request(
392                "OutputStrategy::Prompted is deferred to entelix 1.1; use \
393                 OutputStrategy::Native or OutputStrategy::Tool",
394            ));
395        }
396        OutputStrategy::Auto => unreachable!("Auto resolved above"),
397    }
398    Ok(())
399}
400
401/// Gemini model family detection — 3.x uses `thinkingLevel`
402/// (discrete bucket), 2.5 uses `thinkingBudget` (integer token
403/// count, with `-1` = auto and `0` = disable on Flash only).
404/// Detection by model-string prefix because Gemini's API does not
405/// expose a wire signal for "this model accepts which thinking
406/// shape".
407fn is_gemini_3(model: &str) -> bool {
408    model.starts_with("gemini-3")
409}
410
411/// Gemini 2.5 Flash accepts `thinkingBudget: 0` to disable thinking;
412/// Pro cannot disable. Detection by model-string prefix.
413fn is_gemini_25_flash(model: &str) -> bool {
414    model.starts_with("gemini-2.5-flash") || model.starts_with("gemini-2.5-flash-lite")
415}
416
417/// Translate the cross-vendor [`ReasoningEffort`] knob onto
418/// Gemini's `generationConfig.thinkingConfig`. Per:
419///
420/// 2.5 (`thinkingBudget` integer):
421/// - `Off` → `0` (Flash only — Pro emits LossyEncode → `512`)
422/// - `Minimal` → `512`
423/// - `Low` → `1024`
424/// - `Medium` → `8192`
425/// - `High` → `24576`
426/// - `Auto` → `-1`
427/// - `VendorSpecific(s)` — `s` parses as decimal `thinkingBudget`;
428///   non-numeric emits LossyEncode → `Medium`.
429///
430/// 3.x (`thinkingLevel` enum):
431/// - `Off` → LossyEncode → `"minimal"` (Gemini 3 cannot disable)
432/// - `Minimal/Low/Medium/High` → `"minimal"/"low"/"medium"/"high"`
433/// - `Auto` → LossyEncode → `"high"` (no auto bucket)
434/// - `VendorSpecific(s)` — literal `thinkingLevel`.
435fn encode_gemini_thinking(
436    model: &str,
437    effort: &ReasoningEffort,
438    generation_config: &mut Map<String, Value>,
439    warnings: &mut Vec<ModelWarning>,
440) {
441    let mut thinking_config = Map::new();
442    if is_gemini_3(model) {
443        let level = match effort {
444            ReasoningEffort::Off => {
445                warnings.push(ModelWarning::LossyEncode {
446                    field: "reasoning_effort".into(),
447                    detail: "Gemini 3 cannot disable thinking — snapped to `\"minimal\"`".into(),
448                });
449                "minimal"
450            }
451            ReasoningEffort::Minimal => "minimal",
452            ReasoningEffort::Low => "low",
453            ReasoningEffort::Medium => "medium",
454            ReasoningEffort::High => "high",
455            ReasoningEffort::Auto => {
456                warnings.push(ModelWarning::LossyEncode {
457                    field: "reasoning_effort".into(),
458                    detail: "Gemini 3 has no `Auto` bucket — snapped to `\"high\"`".into(),
459                });
460                "high"
461            }
462            ReasoningEffort::VendorSpecific(literal) => {
463                thinking_config.insert("thinkingLevel".into(), Value::String(literal.clone()));
464                generation_config.insert("thinkingConfig".into(), Value::Object(thinking_config));
465                return;
466            }
467        };
468        thinking_config.insert("thinkingLevel".into(), Value::String(level.into()));
469    } else {
470        // Gemini 2.5 (default for any non-3.x prefix — falls
471        // through cleanly for 2.5 Pro / Flash / Flash-Lite).
472        let budget: i32 = match effort {
473            ReasoningEffort::Off => {
474                if is_gemini_25_flash(model) {
475                    0
476                } else {
477                    warnings.push(ModelWarning::LossyEncode {
478                        field: "reasoning_effort".into(),
479                        detail: format!(
480                            "Gemini 2.5 Pro ({model}) cannot disable thinking — snapped to `512`"
481                        ),
482                    });
483                    512
484                }
485            }
486            ReasoningEffort::Minimal => 512,
487            ReasoningEffort::Low => 1024,
488            ReasoningEffort::Medium => 8192,
489            ReasoningEffort::High => 24576,
490            ReasoningEffort::Auto => -1,
491            ReasoningEffort::VendorSpecific(literal) => {
492                if let Ok(parsed) = literal.parse::<i32>() {
493                    parsed
494                } else {
495                    warnings.push(ModelWarning::LossyEncode {
496                        field: "reasoning_effort".into(),
497                        detail: format!(
498                            "Gemini 2.5 vendor-specific reasoning_effort {literal:?} is not \
499                             a numeric thinkingBudget — falling through to `Medium`"
500                        ),
501                    });
502                    8192
503                }
504            }
505        };
506        thinking_config.insert("thinkingBudget".into(), json!(budget));
507    }
508    generation_config.insert("thinkingConfig".into(), Value::Object(thinking_config));
509}
510
511fn finalize_request(
512    model: &str,
513    body: &Value,
514    warnings: Vec<ModelWarning>,
515    streaming: bool,
516) -> Result<EncodedRequest> {
517    let bytes = serde_json::to_vec(body)?;
518    let path = if streaming {
519        format!("/v1beta/models/{model}:streamGenerateContent?alt=sse")
520    } else {
521        format!("/v1beta/models/{model}:generateContent")
522    };
523    let mut encoded = EncodedRequest::post_json(path, Bytes::from(bytes));
524    encoded.warnings = warnings;
525    Ok(encoded)
526}
527
528// ── encode helpers ─────────────────────────────────────────────────────────
529
530fn encode_messages(
531    request: &ModelRequest,
532    warnings: &mut Vec<ModelWarning>,
533) -> (Option<String>, Vec<Value>) {
534    let mut system_parts: Vec<String> = request
535        .system
536        .blocks()
537        .iter()
538        .map(|b| b.text.clone())
539        .collect();
540    if request.system.any_cached() {
541        warnings.push(ModelWarning::LossyEncode {
542            field: "system.cache_control".into(),
543            detail: "Gemini has no native prompt-cache control on \
544                     systemInstruction; block text is concatenated and \
545                     the cache directive is dropped"
546                .into(),
547        });
548    }
549    let mut contents = Vec::new();
550
551    for (idx, msg) in request.messages.iter().enumerate() {
552        match msg.role {
553            Role::System => {
554                let mut lossy_non_text = false;
555                let mut text = String::new();
556                for part in &msg.content {
557                    if let ContentPart::Text { text: t, .. } = part {
558                        text.push_str(t);
559                    } else {
560                        lossy_non_text = true;
561                    }
562                }
563                if lossy_non_text {
564                    warnings.push(ModelWarning::LossyEncode {
565                        field: format!("messages[{idx}].content"),
566                        detail: "non-text parts dropped from system message (Gemini routes \
567                                 system into systemInstruction)"
568                            .into(),
569                    });
570                }
571                if !text.is_empty() {
572                    system_parts.push(text);
573                }
574            }
575            Role::User => {
576                contents.push(json!({
577                    "role": "user",
578                    "parts": encode_user_parts(&msg.content, warnings, idx),
579                }));
580            }
581            Role::Assistant => {
582                contents.push(json!({
583                    "role": "model",
584                    "parts": encode_assistant_parts(&msg.content, warnings, idx),
585                }));
586            }
587            Role::Tool => {
588                contents.push(json!({
589                    "role": "user",
590                    "parts": encode_tool_response_parts(&msg.content, warnings, idx),
591                }));
592            }
593        }
594    }
595
596    let system_text = if system_parts.is_empty() {
597        None
598    } else {
599        Some(system_parts.join("\n\n"))
600    };
601    (system_text, contents)
602}
603
604fn encode_user_parts(
605    parts: &[ContentPart],
606    warnings: &mut Vec<ModelWarning>,
607    msg_idx: usize,
608) -> Vec<Value> {
609    let mut out = Vec::new();
610    for (part_idx, part) in parts.iter().enumerate() {
611        let path = || format!("messages[{msg_idx}].content[{part_idx}]");
612        match part {
613            ContentPart::Text { text, .. } => out.push(json!({ "text": text })),
614            ContentPart::Image { source, .. } => out.push(encode_media_gemini(source, "image/*")),
615            ContentPart::Audio { source, .. } => out.push(encode_media_gemini(source, "audio/wav")),
616            ContentPart::Video { source, .. } => out.push(encode_media_gemini(source, "video/mp4")),
617            ContentPart::Document { source, .. } => {
618                out.push(encode_media_gemini(source, "application/pdf"));
619            }
620            ContentPart::Thinking { .. } => warnings.push(ModelWarning::LossyEncode {
621                field: path(),
622                detail: "Gemini does not accept thinking blocks on input; block dropped".into(),
623            }),
624            ContentPart::Citation { .. } => warnings.push(ModelWarning::LossyEncode {
625                field: path(),
626                detail: "Gemini does not echo citations on input; block dropped".into(),
627            }),
628            ContentPart::ToolUse { .. } | ContentPart::ToolResult { .. } => {
629                warnings.push(ModelWarning::LossyEncode {
630                    field: path(),
631                    detail: "tool_use / tool_result not allowed on user role for Gemini".into(),
632                });
633            }
634            ContentPart::ImageOutput { .. } | ContentPart::AudioOutput { .. } => {
635                warnings.push(ModelWarning::LossyEncode {
636                    field: path(),
637                    detail: "Gemini does not accept assistant-produced image / audio output \
638                             as input — block dropped"
639                        .into(),
640                });
641            }
642            ContentPart::RedactedThinking { .. } => {
643                warnings.push(ModelWarning::LossyEncode {
644                    field: path(),
645                    detail: "Gemini does not accept redacted_thinking blocks; block dropped".into(),
646                });
647            }
648        }
649    }
650    out
651}
652
653fn encode_media_gemini(source: &MediaSource, fallback_mime: &str) -> Value {
654    match source {
655        MediaSource::Base64 { media_type, data } => json!({
656            "inlineData": { "mimeType": media_type, "data": data },
657        }),
658        MediaSource::Url { url, media_type } => {
659            let mime = media_type.as_deref().unwrap_or(fallback_mime); // silent-fallback-ok: caller-supplied fallback_mime is the typed MediaSource defaulting policy
660            json!({
661                "fileData": { "mimeType": mime, "fileUri": url },
662            })
663        }
664        MediaSource::FileId { id, media_type } => {
665            let mime = media_type.as_deref().unwrap_or(fallback_mime); // silent-fallback-ok: caller-supplied fallback_mime is the typed MediaSource defaulting policy
666            json!({
667                "fileData": { "mimeType": mime, "fileUri": id },
668            })
669        }
670    }
671}
672
673fn encode_assistant_parts(
674    parts: &[ContentPart],
675    warnings: &mut Vec<ModelWarning>,
676    msg_idx: usize,
677) -> Vec<Value> {
678    let mut out = Vec::new();
679    for (part_idx, part) in parts.iter().enumerate() {
680        let path = || format!("messages[{msg_idx}].content[{part_idx}]");
681        match part {
682            ContentPart::Text {
683                text,
684                provider_echoes,
685                ..
686            } => {
687                let mut o = Map::new();
688                o.insert("text".into(), Value::String(text.clone()));
689                if let Some(sig) = encode_thought_signature(provider_echoes) {
690                    o.insert(WIRE_THOUGHT_SIGNATURE.into(), Value::String(sig.to_owned()));
691                }
692                out.push(Value::Object(o));
693            }
694            ContentPart::ToolUse {
695                name,
696                input,
697                provider_echoes,
698                ..
699            } => {
700                // Gemini's wire shape uses the assistant-emitted function name
701                // as the round-trip key — there is no separate id field. The
702                // `tool_use_id` round-trip is preserved at the IR layer by
703                // letting the codec re-derive the id on decode from the same
704                // `name + args` shape.
705                let mut o = Map::new();
706                o.insert(
707                    "functionCall".into(),
708                    json!({ "name": name, "args": input }),
709                );
710                if let Some(sig) = encode_thought_signature(provider_echoes) {
711                    o.insert(WIRE_THOUGHT_SIGNATURE.into(), Value::String(sig.to_owned()));
712                }
713                out.push(Value::Object(o));
714            }
715            ContentPart::Thinking {
716                text,
717                provider_echoes,
718                ..
719            } => {
720                let mut o = Map::new();
721                o.insert("text".into(), Value::String(text.clone()));
722                o.insert("thought".into(), Value::Bool(true));
723                if let Some(sig) = encode_thought_signature(provider_echoes) {
724                    o.insert(WIRE_THOUGHT_SIGNATURE.into(), Value::String(sig.to_owned()));
725                }
726                out.push(Value::Object(o));
727            }
728            ContentPart::Citation { snippet, .. } => out.push(json!({ "text": snippet })),
729            other => {
730                warnings.push(ModelWarning::LossyEncode {
731                    field: path(),
732                    detail: format!(
733                        "{} not supported on model role for Gemini — dropped",
734                        debug_part_kind(other)
735                    ),
736                });
737            }
738        }
739    }
740    out
741}
742
743fn encode_tool_response_parts(
744    parts: &[ContentPart],
745    warnings: &mut Vec<ModelWarning>,
746    msg_idx: usize,
747) -> Vec<Value> {
748    let mut out = Vec::new();
749    for (part_idx, part) in parts.iter().enumerate() {
750        if let ContentPart::ToolResult {
751            tool_use_id: _,
752            name,
753            content,
754            is_error,
755            ..
756        } = part
757        {
758            let response_value = match content {
759                ToolResultContent::Json(v) => v.clone(),
760                ToolResultContent::Text(t) => json!({ "text": t }),
761            };
762            // Gemini's `functionResponse` keys correlation by
763            // `name`, not by id. The IR carries the original name on
764            // `ContentPart::ToolResult` precisely so this codec can
765            // emit it verbatim — no placeholder, no LossyEncode.
766            out.push(json!({
767                "functionResponse": {
768                    "name": name,
769                    "response": response_value,
770                },
771            }));
772            if *is_error {
773                warnings.push(ModelWarning::LossyEncode {
774                    field: format!("messages[{msg_idx}].content[{part_idx}].is_error"),
775                    detail: "Gemini has no functionResponse error flag — passing through content"
776                        .into(),
777                });
778            }
779        } else {
780            warnings.push(ModelWarning::LossyEncode {
781                field: format!("messages[{msg_idx}].content[{part_idx}]"),
782                detail: "non-tool_result part on Role::Tool dropped".into(),
783            });
784        }
785    }
786    out
787}
788
789fn encode_tools(tools: &[crate::ir::ToolSpec], warnings: &mut Vec<ModelWarning>) -> Value {
790    let mut declarations = Vec::new();
791    let mut tool_entries: Vec<Value> = Vec::new();
792    for (idx, t) in tools.iter().enumerate() {
793        match &t.kind {
794            ToolKind::Function { input_schema } => declarations.push(json!({
795                "name": t.name,
796                "description": t.description,
797                "parameters": input_schema,
798            })),
799            ToolKind::WebSearch { .. } => {
800                // Gemini's google_search built-in is parameterless — domain
801                // restrictions and use caps are not exposed on the wire.
802                tool_entries.push(json!({ "google_search": {} }));
803            }
804            ToolKind::CodeExecution => {
805                // Gemini's code_execution built-in: a sandboxed Python REPL
806                // the model invokes autonomously when a turn benefits from
807                // computation. Parameterless on the wire.
808                tool_entries.push(json!({ "code_execution": {} }));
809            }
810            // Anthropic / OpenAI vendor built-ins have no Gemini equivalent.
811            ToolKind::Computer { .. }
812            | ToolKind::TextEditor
813            | ToolKind::Bash
814            | ToolKind::FileSearch { .. }
815            | ToolKind::CodeInterpreter
816            | ToolKind::ImageGeneration
817            | ToolKind::McpConnector { .. }
818            | ToolKind::Memory => warnings.push(ModelWarning::LossyEncode {
819                field: format!("tools[{idx}]"),
820                detail: "Gemini natively ships google_search and code_execution — other \
821                         vendor built-ins (computer, text_editor, file_search, …) have no \
822                         Gemini equivalent; tool dropped"
823                    .into(),
824            }),
825        }
826    }
827    if !declarations.is_empty() {
828        tool_entries.insert(0, json!({ "functionDeclarations": declarations }));
829    }
830    Value::Array(tool_entries)
831}
832
833fn encode_tool_choice(choice: &ToolChoice) -> Value {
834    let mode = match choice {
835        ToolChoice::Auto => "AUTO",
836        // Gemini's "ANY" forces a tool call; for `Specific` we additionally
837        // narrow via `allowedFunctionNames` below.
838        ToolChoice::Required | ToolChoice::Specific { .. } => "ANY",
839        ToolChoice::None => "NONE",
840    };
841    let mut config = json!({ "functionCallingConfig": { "mode": mode } });
842    if let ToolChoice::Specific { name } = choice
843        && let Some(cfg) = config
844            .get_mut("functionCallingConfig")
845            .and_then(Value::as_object_mut)
846    {
847        cfg.insert("allowedFunctionNames".into(), json!([name]));
848    }
849    config
850}
851
852const fn debug_part_kind(part: &ContentPart) -> &'static str {
853    match part {
854        ContentPart::Text { .. } => "text",
855        ContentPart::Image { .. } => "image",
856        ContentPart::Audio { .. } => "audio",
857        ContentPart::Video { .. } => "video",
858        ContentPart::Document { .. } => "document",
859        ContentPart::Thinking { .. } => "thinking",
860        ContentPart::Citation { .. } => "citation",
861        ContentPart::ToolUse { .. } => "tool_use",
862        ContentPart::ToolResult { .. } => "tool_result",
863        ContentPart::ImageOutput { .. } => "image_output",
864        ContentPart::AudioOutput { .. } => "audio_output",
865        ContentPart::RedactedThinking { .. } => "redacted_thinking",
866    }
867}
868
869// ── decode helpers ─────────────────────────────────────────────────────────
870
871fn decode_candidate(
872    raw: &Value,
873    warnings: &mut Vec<ModelWarning>,
874) -> (Vec<ContentPart>, StopReason) {
875    let candidate = raw
876        .get("candidates")
877        .and_then(Value::as_array)
878        .and_then(|a| a.first())
879        .cloned()
880        .unwrap_or(Value::Null); // silent-fallback-ok: response with no candidates array → Null (downstream nested accessors propagate as None)
881    let parts_raw = candidate
882        .get("content")
883        .and_then(|c| c.get("parts"))
884        .and_then(Value::as_array)
885        .cloned()
886        .unwrap_or_default(); // silent-fallback-ok: candidate with no parts array → empty Vec (downstream loop iterates over zero items)
887    let mut parts = Vec::new();
888    // Per-response counter of `functionCall` parts seen so far —
889    // synthesized into the tool-use id so streaming and
890    // non-streaming decoders produce the same id sequence
891    // (`{name}#{tool_seq}`) for the same logical sequence of tool
892    // calls. Using the part-array index directly would diverge:
893    // non-streaming sees `[text, fnCall, text, fnCall]` at indices
894    // 1, 3 while streaming would emit them as 0, 1.
895    let mut tool_seq: usize = 0;
896    for (idx, part) in parts_raw.iter().enumerate() {
897        // Thinking blocks: parts marked `thought: true` carry reasoning text.
898        if part.get("thought").and_then(Value::as_bool) == Some(true) {
899            let text = str_field(part, "text").to_owned();
900            let provider_echoes = decode_thought_signature(part).map_or_else(Vec::new, |e| vec![e]);
901            parts.push(ContentPart::Thinking {
902                text,
903                cache_control: None,
904                provider_echoes,
905            });
906            continue;
907        }
908        if let Some(text) = part.get("text").and_then(Value::as_str)
909            && !text.is_empty()
910        {
911            // Plain `text` parts may also carry `thought_signature`
912            // on reasoning turns — preserve it for round-trip.
913            let provider_echoes = decode_thought_signature(part).map_or_else(Vec::new, |e| vec![e]);
914            parts.push(ContentPart::Text {
915                text: text.to_owned(),
916                cache_control: None,
917                provider_echoes,
918            });
919            continue;
920        }
921        if let Some(call) = part.get("functionCall") {
922            let name = str_field(call, "name").to_owned();
923            let args = call.get("args").cloned().unwrap_or_else(|| json!({})); // silent-fallback-ok: functionCall without args = empty-args call (vendor sometimes omits when schema has no required fields)
924            // `thought_signature` rides on the `Part` itself
925            // (sibling of `functionCall`), not inside the inner
926            // object — Gemini 3.x rejects the next turn with HTTP
927            // 400 if the first `functionCall` of a step is missing
928            // its echo.
929            let provider_echoes = decode_thought_signature(part).map_or_else(Vec::new, |e| vec![e]);
930            // Gemini does not round-trip a tool-use id — derive one
931            // from `(name, tool_seq)` where `tool_seq` is a per-
932            // response counter of function-call parts. Streaming
933            // decoder uses the identical counter, so the same
934            // logical sequence of tool calls produces the same id
935            // sequence regardless of code path.
936            parts.push(ContentPart::ToolUse {
937                id: format!("{name}#{tool_seq}"),
938                name,
939                input: args,
940                provider_echoes,
941            });
942            tool_seq = tool_seq.saturating_add(1);
943            continue;
944        }
945        warnings.push(ModelWarning::LossyEncode {
946            field: format!("candidates[0].content.parts[{idx}]"),
947            detail: "unknown Gemini part type dropped".into(),
948        });
949    }
950    // Grounding metadata → Citation parts.
951    if let Some(meta) = candidate.get("groundingMetadata")
952        && let Some(chunks) = meta.get("groundingChunks").and_then(Value::as_array)
953    {
954        for chunk in chunks {
955            if let Some(web) = chunk.get("web") {
956                let url = str_field(web, "uri").to_owned();
957                let title = web.get("title").and_then(Value::as_str).map(str::to_owned);
958                if !url.is_empty() {
959                    parts.push(ContentPart::Citation {
960                        snippet: title.clone().unwrap_or_default(), // silent-fallback-ok: grounding citation without title → snippet "" (the URL is the load-bearing pointer; title is purely descriptive)
961                        source: CitationSource::Url { url, title },
962                        cache_control: None,
963                        provider_echoes: Vec::new(),
964                    });
965                }
966            }
967        }
968    }
969    let stop_reason = decode_finish_reason(
970        candidate.get("finishReason").and_then(Value::as_str),
971        warnings,
972    );
973    (parts, stop_reason)
974}
975
976fn decode_finish_reason(reason: Option<&str>, warnings: &mut Vec<ModelWarning>) -> StopReason {
977    match reason {
978        Some("STOP") => StopReason::EndTurn,
979        Some("MAX_TOKENS") => StopReason::MaxTokens,
980        // Gemini distinguishes safety blocks from copyright
981        // (RECITATION) blocks — preserve the distinction in IR so
982        // dashboards can split by cause instead of collapsing both
983        // into a single refusal bucket.
984        Some("SAFETY") => StopReason::Refusal {
985            reason: RefusalReason::Safety,
986        },
987        Some("RECITATION") => StopReason::Refusal {
988            reason: RefusalReason::Recitation,
989        },
990        Some(other) => {
991            warnings.push(ModelWarning::UnknownStopReason {
992                raw: other.to_owned(),
993            });
994            StopReason::Other {
995                raw: other.to_owned(),
996            }
997        }
998        None => {
999            // Invariant #15 — silent EndTurn fallback was masking
1000            // truncated stream payloads from callers. Record as
1001            // Other + warning instead.
1002            warnings.push(ModelWarning::LossyEncode {
1003                field: "finishReason".into(),
1004                detail: "Gemini candidate carried no finishReason — \
1005                         IR records `Other{raw:\"missing\"}`"
1006                    .into(),
1007            });
1008            StopReason::Other {
1009                raw: "missing".to_owned(),
1010            }
1011        }
1012    }
1013}
1014
1015fn decode_usage(usage: Option<&Value>) -> Usage {
1016    // Cross-vendor `Usage::output_tokens` invariant — *total billable
1017    // output*. Gemini reports the visible-token slice in
1018    // `candidatesTokenCount` and bills thinking tokens separately
1019    // (vendor docs: "response pricing is the sum of output tokens
1020    // and thinking tokens"). OpenAI / Anthropic already include
1021    // their reasoning slices inside `output_tokens` / `completion_tokens`,
1022    // so the codec aligns Gemini to the same shape. `reasoning_tokens`
1023    // remains as an informational sub-counter operators can isolate
1024    // for thinking-only cost attribution.
1025    let visible = u_field(usage, "candidatesTokenCount");
1026    let thoughts = u_field(usage, "thoughtsTokenCount");
1027    Usage {
1028        input_tokens: u_field(usage, "promptTokenCount"),
1029        output_tokens: visible.saturating_add(thoughts),
1030        cached_input_tokens: u_field(usage, "cachedContentTokenCount"),
1031        cache_creation_input_tokens: 0,
1032        reasoning_tokens: thoughts,
1033        safety_ratings: Vec::new(),
1034    }
1035}
1036
1037fn decode_safety_ratings(candidate: &Value) -> Vec<SafetyRating> {
1038    let Some(raw) = candidate.get("safetyRatings").and_then(Value::as_array) else {
1039        return Vec::new();
1040    };
1041    raw.iter()
1042        .filter_map(|r| {
1043            let category = match r.get("category").and_then(Value::as_str)? {
1044                "HARM_CATEGORY_HARASSMENT" => SafetyCategory::Harassment,
1045                "HARM_CATEGORY_HATE_SPEECH" => SafetyCategory::HateSpeech,
1046                "HARM_CATEGORY_SEXUALLY_EXPLICIT" => SafetyCategory::SexuallyExplicit,
1047                "HARM_CATEGORY_DANGEROUS_CONTENT" => SafetyCategory::DangerousContent,
1048                other => SafetyCategory::Other(other.to_owned()),
1049            };
1050            let level = match r.get("probability").and_then(Value::as_str)? {
1051                "LOW" => SafetyLevel::Low,
1052                "MEDIUM" => SafetyLevel::Medium,
1053                "HIGH" => SafetyLevel::High,
1054                // `"NEGLIGIBLE"` and any unrecognised vendor label collapse
1055                // to the lowest bucket — the IR's four-bucket scale is the
1056                // canonical resolution.
1057                _ => SafetyLevel::Negligible,
1058            };
1059            Some(SafetyRating { category, level })
1060        })
1061        .collect()
1062}
1063
1064fn str_field<'a>(v: &'a Value, key: &str) -> &'a str {
1065    v.get(key).and_then(Value::as_str).unwrap_or("") // silent-fallback-ok: missing optional string field
1066}
1067
1068fn u_field(v: Option<&Value>, key: &str) -> u32 {
1069    v.and_then(|inner| inner.get(key))
1070        .and_then(Value::as_u64)
1071        .map_or(0, |n| u32::try_from(n).unwrap_or(u32::MAX)) // silent-fallback-ok: missing usage metric = 0 (vendor didn't report = unused); u64→u32 saturate
1072}
1073
1074// ── SSE streaming parser ───────────────────────────────────────────────────
1075
1076#[allow(tail_expr_drop_order, clippy::too_many_lines)]
1077fn stream_gemini(
1078    bytes: BoxByteStream<'_>,
1079    warnings_in: Vec<ModelWarning>,
1080) -> impl futures::Stream<Item = Result<StreamDelta>> + Send + '_ {
1081    async_stream::stream! {
1082        let mut bytes = bytes;
1083        let mut buf: Vec<u8> = Vec::new();
1084        let mut started = false;
1085        let mut warnings_emitted = false;
1086        let mut last_stop = StopReason::EndTurn;
1087        let mut current_tool_open = false;
1088        let mut tool_synth_idx: u64 = 0;
1089
1090        while let Some(chunk) = bytes.next().await {
1091            match chunk {
1092                Ok(b) => buf.extend_from_slice(&b),
1093                Err(e) => {
1094                    yield Err(e);
1095                    return;
1096                }
1097            }
1098            if !warnings_emitted {
1099                warnings_emitted = true;
1100                for w in &warnings_in {
1101                    yield Ok(StreamDelta::Warning(w.clone()));
1102                }
1103            }
1104            while let Some(pos) = find_double_newline(&buf) {
1105                let frame: Vec<u8> = buf.drain(..pos.saturating_add(2)).collect();
1106                let Ok(frame_str) = std::str::from_utf8(&frame) else {
1107                    continue;
1108                };
1109                let Some(payload) = parse_sse_data(frame_str) else {
1110                    continue;
1111                };
1112                let Ok(event) = serde_json::from_str::<Value>(&payload) else {
1113                    yield Err(Error::invalid_request(format!(
1114                        "Gemini stream: malformed chunk: {payload}"
1115                    )));
1116                    return;
1117                };
1118                if !started {
1119                    started = true;
1120                    let model = str_field(&event, "modelVersion").to_owned();
1121                    yield Ok(StreamDelta::Start {
1122                        id: String::new(),
1123                        model,
1124                        provider_echoes: Vec::new(),
1125                    });
1126                }
1127                if let Some(usage) = event.get("usageMetadata") {
1128                    yield Ok(StreamDelta::Usage(decode_usage(Some(usage))));
1129                }
1130                let Some(candidate) = event
1131                    .get("candidates")
1132                    .and_then(Value::as_array)
1133                    .and_then(|a| a.first())
1134                else {
1135                    continue;
1136                };
1137                if let Some(reason) = candidate.get("finishReason").and_then(Value::as_str) {
1138                    last_stop = decode_finish_reason(Some(reason), &mut Vec::new());
1139                }
1140                let Some(parts) = candidate
1141                    .get("content")
1142                    .and_then(|c| c.get("parts"))
1143                    .and_then(Value::as_array)
1144                else {
1145                    continue;
1146                };
1147                for part in parts {
1148                    // Thinking branches first so a `text` payload marked
1149                    // `thought: true` routes to ThinkingDelta rather than
1150                    // TextDelta.
1151                    if part.get("thought").and_then(Value::as_bool) == Some(true) {
1152                        if current_tool_open {
1153                            yield Ok(StreamDelta::ToolUseStop);
1154                            current_tool_open = false;
1155                        }
1156                        let text = part
1157                            .get("text")
1158                            .and_then(Value::as_str)
1159                            .unwrap_or("") // silent-fallback-ok: missing thinking text → empty body; downstream is_empty() guard suppresses the StreamDelta
1160                            .to_owned();
1161                        let provider_echoes =
1162                            decode_thought_signature(part).map_or_else(Vec::new, |e| vec![e]);
1163                        if !text.is_empty() || !provider_echoes.is_empty() {
1164                            yield Ok(StreamDelta::ThinkingDelta {
1165                                text,
1166                                provider_echoes,
1167                            });
1168                        }
1169                        continue;
1170                    }
1171                    if let Some(text) = part.get("text").and_then(Value::as_str)
1172                        && !text.is_empty()
1173                    {
1174                        if current_tool_open {
1175                            yield Ok(StreamDelta::ToolUseStop);
1176                            current_tool_open = false;
1177                        }
1178                        let provider_echoes =
1179                            decode_thought_signature(part).map_or_else(Vec::new, |e| vec![e]);
1180                        yield Ok(StreamDelta::TextDelta {
1181                            text: text.to_owned(),
1182                            provider_echoes,
1183                        });
1184                        continue;
1185                    }
1186                    if let Some(call) = part.get("functionCall") {
1187                        if current_tool_open {
1188                            yield Ok(StreamDelta::ToolUseStop);
1189                        }
1190                        let name = str_field(call, "name").to_owned();
1191                        let args = call.get("args").cloned().unwrap_or_else(|| json!({})); // silent-fallback-ok: streaming functionCall without args = empty-args call
1192                        let synth_id = format!("{name}#{tool_synth_idx}");
1193                        tool_synth_idx = tool_synth_idx.saturating_add(1);
1194                        let provider_echoes =
1195                            decode_thought_signature(part).map_or_else(Vec::new, |e| vec![e]);
1196                        yield Ok(StreamDelta::ToolUseStart {
1197                            id: synth_id,
1198                            name,
1199                            provider_echoes,
1200                        });
1201                        yield Ok(StreamDelta::ToolUseInputDelta {
1202                            partial_json: args.to_string(),
1203                        });
1204                        current_tool_open = true;
1205                    }
1206                }
1207            }
1208        }
1209        if current_tool_open {
1210            yield Ok(StreamDelta::ToolUseStop);
1211        }
1212        yield Ok(StreamDelta::Stop {
1213            stop_reason: last_stop,
1214        });
1215    }
1216}
1217
1218fn find_double_newline(buf: &[u8]) -> Option<usize> {
1219    let lf = buf.windows(2).position(|w| w == b"\n\n");
1220    let crlf = buf.windows(4).position(|w| w == b"\r\n\r\n");
1221    match (lf, crlf) {
1222        (Some(a), Some(b)) => Some(a.min(b)),
1223        (Some(a), None) => Some(a),
1224        (None, Some(b)) => Some(b),
1225        (None, None) => None,
1226    }
1227}
1228
1229fn parse_sse_data(frame: &str) -> Option<String> {
1230    let mut out: Option<String> = None;
1231    for line in frame.lines() {
1232        if let Some(rest) = line.strip_prefix("data:") {
1233            let trimmed = rest.strip_prefix(' ').unwrap_or(rest); // silent-fallback-ok: SSE data line may or may not have leading space; idiomatic strip-or-pass-through
1234            match &mut out {
1235                Some(existing) => {
1236                    existing.push('\n');
1237                    existing.push_str(trimmed);
1238                }
1239                None => out = Some(trimmed.to_owned()),
1240            }
1241        }
1242    }
1243    out
1244}
entelix_core/codecs/gemini.rs

entelix_core/codecs/
gemini.rs