Skip to main content

entelix_core/codecs/
vertex_gemini.rs

1//! `VertexGeminiCodec` — IR ⇄ Gemini API as routed through Google
2//! Cloud Vertex AI's publisher-model endpoints.
3//!
4//! Wire-format reference:
5//! <https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#syntax>.
6//!
7//! One — and only one — wire-shape divergence from
8//! [`super::GeminiCodec`]:
9//!
10//! 1. The model id rides in the URL path under
11//!    `/publishers/google/models/{model}`. The body shape is
12//!    identical between direct AI Studio Gemini and Vertex Gemini —
13//!    Gemini does not carry the model in the body, only the URL,
14//!    so encode delegates the body verbatim to the inner codec and
15//!    only rewrites the path.
16//!
17//! Every other surface — multimodal content parts, system
18//! instruction, tool config, function calling, structured output,
19//! thinking budget, SSE event stream, rate-limit headers — is
20//! identical to direct Gemini. The codec composes
21//! [`super::GeminiCodec`] and rewrites the encoded path; decode
22//! paths delegate unchanged because the response shape mirrors the
23//! direct generateContent / streamGenerateContent API exactly.
24//!
25//! The emitted path is partial
26//! (`/publishers/google/models/{model}:generateContent`) — the GCP
27//! project + location prefix
28//! (`/v1/projects/{project}/locations/{location}`) is the
29//! `VertexTransport`'s responsibility because the codec is
30//! project-agnostic by contract (invariant 5 — codecs operate on
31//! neutral IR, transports own connection identity).
32
33use crate::codecs::GeminiCodec;
34use crate::codecs::codec::{BoxByteStream, BoxDeltaStream, Codec, EncodedRequest};
35use crate::error::Result;
36use crate::ir::{Capabilities, ModelRequest, ModelResponse, ModelWarning, OutputStrategy};
37use crate::rate_limit::RateLimitSnapshot;
38
39/// Stateless codec for Google Gemini routed through GCP Vertex AI.
40#[derive(Clone, Copy, Debug, Default)]
41pub struct VertexGeminiCodec {
42    inner: GeminiCodec,
43}
44
45impl VertexGeminiCodec {
46    /// Create a fresh codec instance.
47    #[must_use]
48    pub const fn new() -> Self {
49        Self {
50            inner: GeminiCodec::new(),
51        }
52    }
53}
54
55impl Codec for VertexGeminiCodec {
56    fn name(&self) -> &'static str {
57        "vertex-gemini"
58    }
59
60    fn capabilities(&self, model: &str) -> Capabilities {
61        // Vertex Gemini exposes the same feature surface as direct
62        // Gemini for any given model — capability routing therefore
63        // delegates wholesale.
64        self.inner.capabilities(model)
65    }
66
67    fn auto_output_strategy(&self, model: &str) -> OutputStrategy {
68        self.inner.auto_output_strategy(model)
69    }
70
71    fn encode(&self, request: &ModelRequest) -> Result<EncodedRequest> {
72        let mut encoded = self.inner.encode(request)?;
73        rewrite_path_for_vertex(&mut encoded, &request.model, false);
74        Ok(encoded)
75    }
76
77    fn encode_streaming(&self, request: &ModelRequest) -> Result<EncodedRequest> {
78        let mut encoded = self.inner.encode_streaming(request)?;
79        rewrite_path_for_vertex(&mut encoded, &request.model, true);
80        Ok(encoded)
81    }
82
83    fn decode_stream<'a>(
84        &'a self,
85        bytes: BoxByteStream<'a>,
86        warnings_in: Vec<ModelWarning>,
87    ) -> BoxDeltaStream<'a> {
88        // Vertex serves the same SSE event stream as direct Gemini —
89        // the parser is unchanged.
90        self.inner.decode_stream(bytes, warnings_in)
91    }
92
93    fn decode(&self, body: &[u8], warnings_in: Vec<ModelWarning>) -> Result<ModelResponse> {
94        // Response shape is identical to direct Gemini.
95        self.inner.decode(body, warnings_in)
96    }
97
98    fn extract_rate_limit(&self, headers: &http::HeaderMap) -> Option<RateLimitSnapshot> {
99        // Vertex layers its own quota headers on top, but anything
100        // Gemini-shaped that flows through is identical — defer to
101        // the direct codec's parser.
102        self.inner.extract_rate_limit(headers)
103    }
104}
105
106/// Rewrite the path the inner [`GeminiCodec`] just produced
107/// (`/v1beta/models/{model}:{action}`) into the Vertex publisher-
108/// model partial path (`/publishers/google/models/{model}:{action}`)
109/// that `VertexTransport::resolve_url` then prefixes with the
110/// project + location segments.
111fn rewrite_path_for_vertex(encoded: &mut EncodedRequest, model: &str, streaming: bool) {
112    let action = if streaming {
113        "streamGenerateContent?alt=sse"
114    } else {
115        "generateContent"
116    };
117    encoded.path = format!("/publishers/google/models/{model}:{action}");
118}
119
120#[cfg(test)]
121#[allow(clippy::unwrap_used, clippy::expect_used, clippy::indexing_slicing)]
122mod tests {
123    use super::*;
124    use crate::ir::{Message, ModelRequest};
125
126    fn req() -> ModelRequest {
127        ModelRequest {
128            model: "gemini-3.1-pro".into(),
129            messages: vec![Message::user("hi")],
130            max_tokens: Some(16),
131            ..ModelRequest::default()
132        }
133    }
134
135    #[test]
136    fn encode_emits_publisher_partial_path() {
137        let codec = VertexGeminiCodec::new();
138        let encoded = codec.encode(&req()).unwrap();
139        assert_eq!(
140            encoded.path, "/publishers/google/models/gemini-3.1-pro:generateContent",
141            "Vertex Gemini codec must emit the publisher-partial path so VertexTransport can prefix project + location"
142        );
143    }
144
145    #[test]
146    fn encode_streaming_emits_publisher_partial_path_with_sse_alt() {
147        let codec = VertexGeminiCodec::new();
148        let encoded = codec.encode_streaming(&req()).unwrap();
149        assert!(encoded.streaming);
150        assert_eq!(
151            encoded.path,
152            "/publishers/google/models/gemini-3.1-pro:streamGenerateContent?alt=sse",
153        );
154    }
155
156    #[test]
157    fn encode_body_delegates_to_inner_unchanged() {
158        let codec = VertexGeminiCodec::new();
159        let direct = GeminiCodec::new();
160        let body_v = codec.encode(&req()).unwrap().body;
161        let body_d = direct.encode(&req()).unwrap().body;
162        assert_eq!(
163            body_v, body_d,
164            "Vertex Gemini body shape is identical to direct Gemini — only the URL path differs"
165        );
166    }
167}