Skip to main content

entelix_core/codecs/
vertex_anthropic.rs

1//! `VertexAnthropicCodec` — IR ⇄ Anthropic Messages API as routed
2//! through Google Cloud Vertex AI's `:rawPredict` /
3//! `:streamRawPredict` endpoints.
4//!
5//! Wire-format reference:
6//! <https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/use-claude>
7//! and <https://platform.claude.com/docs/en/build-with-claude/claude-on-vertex-ai>.
8//!
9//! Two — and only two — wire-shape divergences from
10//! [`super::AnthropicMessagesCodec`]:
11//!
12//! 1. The model id rides in the URL (the `VertexTransport` path
13//!    component), so the request body must NOT carry a `model`
14//!    field. Direct-Anthropic does send `model` in the body.
15//! 2. `anthropic_version: "vertex-2023-10-16"` is required as a
16//!    body field (not the `anthropic-version` header used direct).
17//!
18//! Every other surface — system prompt, messages, tool config,
19//! cache_control blocks, extended-thinking, anthropic-beta headers,
20//! SSE event stream, rate-limit headers — is identical to direct
21//! Anthropic. The codec composes [`super::AnthropicMessagesCodec`]
22//! and rewrites the encoded body in those two narrow ways; the
23//! decode path is delegated unchanged because the response shape
24//! mirrors the direct Messages API exactly.
25
26use bytes::Bytes;
27use http::HeaderName;
28use serde_json::Value;
29
30use crate::codecs::AnthropicMessagesCodec;
31use crate::codecs::codec::{BoxByteStream, BoxDeltaStream, Codec, EncodedRequest};
32use crate::error::Result;
33use crate::ir::{Capabilities, ModelRequest, ModelResponse, ModelWarning, OutputStrategy};
34use crate::rate_limit::RateLimitSnapshot;
35
36/// Vertex AI's required body marker selecting the Anthropic-on-Vertex
37/// wire contract. Cannot be omitted; cannot be overridden — the
38/// vendor pins this constant per the published partner-model spec.
39pub const VERTEX_ANTHROPIC_VERSION: &str = "vertex-2023-10-16";
40
41/// Stateless codec for Anthropic Claude routed through GCP Vertex AI.
42#[derive(Clone, Copy, Debug, Default)]
43pub struct VertexAnthropicCodec {
44    inner: AnthropicMessagesCodec,
45}
46
47impl VertexAnthropicCodec {
48    /// Create a fresh codec instance.
49    #[must_use]
50    pub const fn new() -> Self {
51        Self {
52            inner: AnthropicMessagesCodec::new(),
53        }
54    }
55}
56
57impl Codec for VertexAnthropicCodec {
58    fn name(&self) -> &'static str {
59        "vertex-anthropic"
60    }
61
62    fn capabilities(&self, model: &str) -> Capabilities {
63        // Vertex Anthropic exposes the same feature surface as
64        // direct Anthropic for any given Claude model — capability
65        // routing therefore delegates wholesale.
66        self.inner.capabilities(model)
67    }
68
69    fn auto_output_strategy(&self, model: &str) -> OutputStrategy {
70        self.inner.auto_output_strategy(model)
71    }
72
73    fn encode(&self, request: &ModelRequest) -> Result<EncodedRequest> {
74        let mut encoded = self.inner.encode(request)?;
75        rewrite_for_vertex(&mut encoded, &request.model, false)?;
76        Ok(encoded)
77    }
78
79    fn encode_streaming(&self, request: &ModelRequest) -> Result<EncodedRequest> {
80        let mut encoded = self.inner.encode_streaming(request)?;
81        rewrite_for_vertex(&mut encoded, &request.model, true)?;
82        Ok(encoded)
83    }
84
85    fn decode_stream<'a>(
86        &'a self,
87        bytes: BoxByteStream<'a>,
88        warnings_in: Vec<ModelWarning>,
89    ) -> BoxDeltaStream<'a> {
90        // Vertex serves the same SSE event stream as direct
91        // Anthropic — the parser is unchanged.
92        self.inner.decode_stream(bytes, warnings_in)
93    }
94
95    fn decode(&self, body: &[u8], warnings_in: Vec<ModelWarning>) -> Result<ModelResponse> {
96        // Response shape is identical to direct Anthropic Messages.
97        self.inner.decode(body, warnings_in)
98    }
99
100    fn extract_rate_limit(&self, headers: &http::HeaderMap) -> Option<RateLimitSnapshot> {
101        // Vertex layers its own quota headers on top, but anything
102        // Anthropic-shaped that flows through is identical — defer
103        // to the direct codec's parser. (GCP-specific quota headers
104        // surface through `VertexTransport`'s own snapshot, so the
105        // codec stays vendor-agnostic.)
106        self.inner.extract_rate_limit(headers)
107    }
108}
109
110/// Apply the Vertex-Anthropic wire deltas to an encoded request the
111/// inner [`AnthropicMessagesCodec`] just produced:
112///
113/// - drop the `model` body field (Vertex routes by URL path),
114/// - inject `anthropic_version: "vertex-2023-10-16"`,
115/// - strip the `anthropic-version` header (Vertex carries the
116///   marker in the body instead),
117/// - rewrite the path to the publisher-model resource the
118///   `:rawPredict` / `:streamRawPredict` endpoints expect.
119///
120/// `anthropic-beta` headers are left in place — Vertex honours them
121/// for extended-thinking / computer-use / cache-control variants.
122///
123/// The emitted path is partial (`/publishers/anthropic/models/{model}:rawPredict`)
124/// — the GCP project + location prefix
125/// (`/v1/projects/{project}/locations/{location}`) is the
126/// `VertexTransport`'s responsibility because the codec is
127/// project-agnostic by contract (invariant 5 — codecs operate on
128/// neutral IR, transports own connection identity).
129fn rewrite_for_vertex(encoded: &mut EncodedRequest, model: &str, streaming: bool) -> Result<()> {
130    let mut body: Value = serde_json::from_slice(&encoded.body)?;
131    let Value::Object(ref mut obj) = body else {
132        return Err(crate::error::Error::invalid_request(
133            "VertexAnthropicCodec: AnthropicMessagesCodec produced a non-object body",
134        ));
135    };
136    obj.remove("model");
137    obj.insert(
138        "anthropic_version".to_owned(),
139        Value::String(VERTEX_ANTHROPIC_VERSION.to_owned()),
140    );
141    encoded.body = Bytes::from(serde_json::to_vec(&body)?);
142    encoded
143        .headers
144        .remove(HeaderName::from_static("anthropic-version"));
145
146    let action = if streaming {
147        "streamRawPredict"
148    } else {
149        "rawPredict"
150    };
151    encoded.path = format!("/publishers/anthropic/models/{model}:{action}");
152    Ok(())
153}
154
155#[cfg(test)]
156#[allow(clippy::unwrap_used, clippy::expect_used, clippy::indexing_slicing)]
157mod tests {
158    use super::*;
159    use crate::ir::{Message, ModelRequest};
160
161    fn parse(body: &Bytes) -> Value {
162        serde_json::from_slice(body).expect("body must be JSON")
163    }
164
165    fn req() -> ModelRequest {
166        ModelRequest {
167            model: "claude-opus-4-7".into(),
168            messages: vec![Message::user("hi")],
169            max_tokens: Some(1024),
170            ..ModelRequest::default()
171        }
172    }
173
174    #[test]
175    fn encode_replaces_model_with_anthropic_version_in_body() {
176        let codec = VertexAnthropicCodec::new();
177        let encoded = codec.encode(&req()).unwrap();
178        let body = parse(&encoded.body);
179        let obj = body.as_object().unwrap();
180        assert_eq!(obj["anthropic_version"], "vertex-2023-10-16");
181        assert!(
182            !obj.contains_key("model"),
183            "Vertex routes by URL path — `model` must NOT appear in body"
184        );
185    }
186
187    #[test]
188    fn encode_preserves_messages_and_max_tokens() {
189        let codec = VertexAnthropicCodec::new();
190        let body = parse(&codec.encode(&req()).unwrap().body);
191        assert_eq!(body["messages"][0]["role"], "user");
192        assert_eq!(body["messages"][0]["content"][0]["text"], "hi");
193        assert_eq!(body["max_tokens"], 1024);
194    }
195
196    #[test]
197    fn encode_strips_anthropic_version_header() {
198        let codec = VertexAnthropicCodec::new();
199        let encoded = codec.encode(&req()).unwrap();
200        assert!(
201            encoded.headers.get("anthropic-version").is_none(),
202            "Vertex carries the version marker in the body — header must be stripped"
203        );
204    }
205
206    #[test]
207    fn encode_streaming_applies_same_rewrites() {
208        let codec = VertexAnthropicCodec::new();
209        let encoded = codec.encode_streaming(&req()).unwrap();
210        assert!(encoded.streaming);
211        let body = parse(&encoded.body);
212        assert_eq!(body["anthropic_version"], "vertex-2023-10-16");
213        assert!(body.get("model").is_none());
214    }
215
216    #[test]
217    fn decode_delegates_to_direct_anthropic_response_shape() {
218        let codec = VertexAnthropicCodec::new();
219        let body = serde_json::json!({
220            "id": "msg_x",
221            "model": "claude-opus-4-7",
222            "stop_reason": "end_turn",
223            "content": [{ "type": "text", "text": "Hello!" }],
224            "usage": { "input_tokens": 4, "output_tokens": 1 }
225        });
226        let decoded = codec
227            .decode(body.to_string().as_bytes(), Vec::new())
228            .unwrap();
229        assert_eq!(decoded.id, "msg_x");
230        assert_eq!(decoded.usage.input_tokens, 4);
231        assert_eq!(decoded.usage.output_tokens, 1);
232    }
233}