Skip to main content

agent_sdk_providers/impls/
cloudflare_ai_gateway.rs

1//! Cloudflare AI Gateway provider implementation.
2//!
3//! Routes requests through [Cloudflare's AI Gateway](https://developers.cloudflare.com/ai-gateway/)
4//! using **provider-native proxy** endpoints, preserving all provider-specific
5//! features (prompt caching, extended thinking, adaptive thinking, etc.).
6//!
7//! Unlike the Unified API (`/compat`), the provider-native proxy keeps each
8//! provider's request/response format intact — the gateway only swaps the base
9//! URL and handles authentication via BYOK.
10//!
11//! # Why provider-native over unified?
12//!
13//! The unified endpoint translates everything to `OpenAI` Chat Completions
14//! format, which **loses** critical features:
15//! - Anthropic prompt caching (`cache_control: ephemeral`) — significant cost savings
16//! - Anthropic adaptive / budgeted thinking
17//! - Gemini `cachedContent` handles
18//! - Anthropic thought signatures for tool verification
19//!
20//! The provider-native proxy preserves all of these.
21//!
22//! # BYOK authentication
23//!
24//! Provider API keys are stored in the Cloudflare dashboard. At runtime only a
25//! Cloudflare API token is needed — no provider secrets in code.
26//!
27//! # Example
28//!
29//! ```ignore
30//! use agent_sdk::providers::CloudflareAIGatewayProvider;
31//!
32//! // BYOK — CF token is the only secret at runtime
33//! let provider = CloudflareAIGatewayProvider::anthropic_sonnet(
34//!     "your-cf-api-token",
35//!     "your-cf-account-id",
36//!     "your-gateway-id",
37//! );
38//!
39//! // Pass-through — provider key at runtime, optional gateway auth
40//! let provider = CloudflareAIGatewayProvider::anthropic(
41//!     "your-anthropic-key".to_string(),
42//!     "your-cf-account-id",
43//!     "your-gateway-id",
44//!     "claude-sonnet-4-6".to_string(),
45//! ).with_gateway_token("your-cf-api-token");
46//! ```
47
48use crate::impls::anthropic::AnthropicProvider;
49use crate::impls::gemini::GeminiProvider;
50use crate::impls::openai::OpenAIProvider;
51use crate::model_capabilities::ModelCapabilities;
52use crate::provider::LlmProvider;
53use crate::streaming::StreamBox;
54use agent_sdk_foundation::llm::{ChatOutcome, ChatRequest, ThinkingConfig};
55use anyhow::Result;
56use async_trait::async_trait;
57
58const GATEWAY_BASE_URL: &str = "https://gateway.ai.cloudflare.com/v1";
59const CF_AIG_AUTH_HEADER: &str = "cf-aig-authorization";
60
61/// Upstream provider that the gateway routes to.
62#[derive(Clone)]
63enum Inner {
64    Anthropic(AnthropicProvider),
65    OpenAI(OpenAIProvider),
66    Gemini(GeminiProvider),
67}
68
69/// Cloudflare AI Gateway LLM provider.
70///
71/// Wraps an upstream provider (Anthropic, `OpenAI`, or Gemini) and routes
72/// requests through the provider-native proxy endpoint, preserving all
73/// provider-specific features including prompt caching, extended thinking,
74/// and streaming.
75///
76/// The gateway provides analytics, caching, rate limiting, logging, and
77/// automatic fallback on top.
78#[derive(Clone)]
79pub struct CloudflareAIGatewayProvider {
80    inner: Inner,
81}
82
83fn gateway_base(account_id: &str, gateway_id: &str, provider_segment: &str) -> String {
84    format!("{GATEWAY_BASE_URL}/{account_id}/{gateway_id}/{provider_segment}")
85}
86
87fn byok_headers(cf_token: &str) -> Vec<(String, String)> {
88    vec![(CF_AIG_AUTH_HEADER.to_owned(), format!("Bearer {cf_token}"))]
89}
90
91impl CloudflareAIGatewayProvider {
92    // ========================================================================
93    // Anthropic (provider-native: /anthropic/v1/messages)
94    // ========================================================================
95
96    /// Route to any Anthropic model via the provider-native proxy.
97    ///
98    /// In **BYOK mode** pass an empty `api_key` and call
99    /// [`with_gateway_token`](Self::with_gateway_token).
100    /// In **pass-through mode** pass the Anthropic API key directly.
101    #[must_use]
102    pub fn anthropic(api_key: String, account_id: &str, gateway_id: &str, model: String) -> Self {
103        let base_url = gateway_base(account_id, gateway_id, "anthropic");
104        let inner = AnthropicProvider::new(api_key, model).with_base_url(base_url);
105        Self {
106            inner: Inner::Anthropic(inner),
107        }
108    }
109
110    /// Route to Claude Sonnet 4.6 — BYOK mode (CF token only).
111    #[must_use]
112    pub fn anthropic_sonnet(cf_token: &str, account_id: &str, gateway_id: &str) -> Self {
113        Self::anthropic(
114            String::new(),
115            account_id,
116            gateway_id,
117            "claude-sonnet-4-6".to_owned(),
118        )
119        .with_gateway_token(cf_token)
120    }
121
122    /// Route to Claude Opus 4.6 — BYOK mode (CF token only).
123    #[must_use]
124    pub fn anthropic_opus(cf_token: &str, account_id: &str, gateway_id: &str) -> Self {
125        Self::anthropic(
126            String::new(),
127            account_id,
128            gateway_id,
129            "claude-opus-4-6".to_owned(),
130        )
131        .with_gateway_token(cf_token)
132    }
133
134    // ========================================================================
135    // OpenAI (provider-native: /openai/chat/completions)
136    // ========================================================================
137
138    /// Route to any `OpenAI` model via the provider-native proxy.
139    #[must_use]
140    pub fn openai(api_key: String, account_id: &str, gateway_id: &str, model: String) -> Self {
141        let base_url = gateway_base(account_id, gateway_id, "openai");
142        let inner = OpenAIProvider::with_base_url(api_key, model, base_url);
143        Self {
144            inner: Inner::OpenAI(inner),
145        }
146    }
147
148    /// Route to GPT-5.4 — BYOK mode.
149    #[must_use]
150    pub fn openai_gpt54(cf_token: &str, account_id: &str, gateway_id: &str) -> Self {
151        Self::openai(String::new(), account_id, gateway_id, "gpt-5.4".to_owned())
152            .with_gateway_token(cf_token)
153    }
154
155    /// Route to GPT-5.4 Mini — BYOK mode.
156    #[must_use]
157    pub fn openai_gpt54_mini(cf_token: &str, account_id: &str, gateway_id: &str) -> Self {
158        Self::openai(
159            String::new(),
160            account_id,
161            gateway_id,
162            "gpt-5.4-mini".to_owned(),
163        )
164        .with_gateway_token(cf_token)
165    }
166
167    /// Route to GPT-5.4 Nano — BYOK mode.
168    #[must_use]
169    pub fn openai_gpt54_nano(cf_token: &str, account_id: &str, gateway_id: &str) -> Self {
170        Self::openai(
171            String::new(),
172            account_id,
173            gateway_id,
174            "gpt-5.4-nano".to_owned(),
175        )
176        .with_gateway_token(cf_token)
177    }
178
179    // ========================================================================
180    // Gemini (provider-native: /google-ai-studio/v1beta/models/...)
181    // ========================================================================
182
183    /// Route to any Gemini model via the provider-native proxy.
184    ///
185    /// Automatically switches to header-based auth (`x-goog-api-key`) as
186    /// required by the gateway.
187    #[must_use]
188    pub fn gemini(api_key: String, account_id: &str, gateway_id: &str, model: String) -> Self {
189        let base_url = gateway_base(account_id, gateway_id, "google-ai-studio/v1beta");
190        let inner = GeminiProvider::new(api_key, model)
191            .with_base_url(base_url)
192            .with_header_auth();
193        Self {
194            inner: Inner::Gemini(inner),
195        }
196    }
197
198    /// Route to Gemini 3.1 Pro — BYOK mode.
199    #[must_use]
200    pub fn gemini_pro(cf_token: &str, account_id: &str, gateway_id: &str) -> Self {
201        Self::gemini(
202            String::new(),
203            account_id,
204            gateway_id,
205            "gemini-3.1-pro-preview".to_owned(),
206        )
207        .with_gateway_token(cf_token)
208    }
209
210    /// Route to Gemini 3 Flash — BYOK mode.
211    #[must_use]
212    pub fn gemini_flash(cf_token: &str, account_id: &str, gateway_id: &str) -> Self {
213        Self::gemini(
214            String::new(),
215            account_id,
216            gateway_id,
217            "gemini-3-flash-preview".to_owned(),
218        )
219        .with_gateway_token(cf_token)
220    }
221
222    // ========================================================================
223    // Configuration
224    // ========================================================================
225
226    /// Set the Cloudflare AI Gateway authentication token.
227    ///
228    /// Sent via the `cf-aig-authorization` header. In BYOK mode this is the
229    /// only auth needed. In pass-through mode it authenticates with the
230    /// gateway while the provider API key authenticates with the upstream.
231    #[must_use]
232    pub fn with_gateway_token(mut self, token: &str) -> Self {
233        let headers = byok_headers(token);
234        match &mut self.inner {
235            Inner::Anthropic(p) => {
236                *p = std::mem::replace(p, AnthropicProvider::new(String::new(), String::new()))
237                    .with_extra_headers(headers);
238            }
239            Inner::OpenAI(p) => {
240                *p = std::mem::replace(p, OpenAIProvider::new(String::new(), String::new()))
241                    .with_extra_headers(headers);
242            }
243            Inner::Gemini(p) => {
244                *p = std::mem::replace(p, GeminiProvider::new(String::new(), String::new()))
245                    .with_extra_headers(headers);
246            }
247        }
248        self
249    }
250
251    /// Set the provider-owned thinking configuration.
252    #[must_use]
253    pub fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
254        match &mut self.inner {
255            Inner::Anthropic(p) => {
256                *p = std::mem::replace(p, AnthropicProvider::new(String::new(), String::new()))
257                    .with_thinking(thinking);
258            }
259            Inner::OpenAI(p) => {
260                *p = std::mem::replace(p, OpenAIProvider::new(String::new(), String::new()))
261                    .with_thinking(thinking);
262            }
263            Inner::Gemini(p) => {
264                *p = std::mem::replace(p, GeminiProvider::new(String::new(), String::new()))
265                    .with_thinking(thinking);
266            }
267        }
268        self
269    }
270}
271
272#[async_trait]
273impl LlmProvider for CloudflareAIGatewayProvider {
274    async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
275        match &self.inner {
276            Inner::Anthropic(p) => p.chat(request).await,
277            Inner::OpenAI(p) => p.chat(request).await,
278            Inner::Gemini(p) => p.chat(request).await,
279        }
280    }
281
282    fn chat_stream(&self, request: ChatRequest) -> StreamBox<'_> {
283        match &self.inner {
284            Inner::Anthropic(p) => p.chat_stream(request),
285            Inner::OpenAI(p) => p.chat_stream(request),
286            Inner::Gemini(p) => p.chat_stream(request),
287        }
288    }
289
290    fn model(&self) -> &str {
291        match &self.inner {
292            Inner::Anthropic(p) => p.model(),
293            Inner::OpenAI(p) => p.model(),
294            Inner::Gemini(p) => p.model(),
295        }
296    }
297
298    fn provider(&self) -> &'static str {
299        "cloudflare-ai-gateway"
300    }
301
302    fn configured_thinking(&self) -> Option<&ThinkingConfig> {
303        match &self.inner {
304            Inner::Anthropic(p) => p.configured_thinking(),
305            Inner::OpenAI(p) => p.configured_thinking(),
306            Inner::Gemini(p) => p.configured_thinking(),
307        }
308    }
309
310    fn capabilities(&self) -> Option<&'static ModelCapabilities> {
311        match &self.inner {
312            Inner::Anthropic(p) => p.capabilities(),
313            Inner::OpenAI(p) => p.capabilities(),
314            Inner::Gemini(p) => p.capabilities(),
315        }
316    }
317
318    fn structured_output_support(&self) -> crate::provider::StructuredOutputSupport {
319        match &self.inner {
320            Inner::Anthropic(p) => p.structured_output_support(),
321            Inner::OpenAI(p) => p.structured_output_support(),
322            Inner::Gemini(p) => p.structured_output_support(),
323        }
324    }
325
326    fn validate_thinking_config(&self, thinking: Option<&ThinkingConfig>) -> Result<()> {
327        match &self.inner {
328            Inner::Anthropic(p) => p.validate_thinking_config(thinking),
329            Inner::OpenAI(p) => p.validate_thinking_config(thinking),
330            Inner::Gemini(p) => p.validate_thinking_config(thinking),
331        }
332    }
333
334    fn default_max_tokens(&self) -> u32 {
335        match &self.inner {
336            Inner::Anthropic(p) => p.default_max_tokens(),
337            Inner::OpenAI(p) => p.default_max_tokens(),
338            Inner::Gemini(p) => p.default_max_tokens(),
339        }
340    }
341}
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346
347    #[test]
348    fn anthropic_sonnet_byok() {
349        let p = CloudflareAIGatewayProvider::anthropic_sonnet("cf-tok", "acct", "gw");
350        assert_eq!(p.model(), "claude-sonnet-4-6");
351        assert_eq!(p.provider(), "cloudflare-ai-gateway");
352    }
353
354    #[test]
355    fn anthropic_opus_byok() {
356        let p = CloudflareAIGatewayProvider::anthropic_opus("cf-tok", "acct", "gw");
357        assert_eq!(p.model(), "claude-opus-4-6");
358    }
359
360    #[test]
361    fn openai_gpt54_byok() {
362        let p = CloudflareAIGatewayProvider::openai_gpt54("cf-tok", "acct", "gw");
363        assert_eq!(p.model(), "gpt-5.4");
364    }
365
366    #[test]
367    fn openai_gpt54_mini_byok() {
368        let p = CloudflareAIGatewayProvider::openai_gpt54_mini("cf-tok", "acct", "gw");
369        assert_eq!(p.model(), "gpt-5.4-mini");
370    }
371
372    #[test]
373    fn openai_gpt54_nano_byok() {
374        let p = CloudflareAIGatewayProvider::openai_gpt54_nano("cf-tok", "acct", "gw");
375        assert_eq!(p.model(), "gpt-5.4-nano");
376    }
377
378    #[test]
379    fn gemini_pro_byok() {
380        let p = CloudflareAIGatewayProvider::gemini_pro("cf-tok", "acct", "gw");
381        assert_eq!(p.model(), "gemini-3.1-pro-preview");
382    }
383
384    #[test]
385    fn gemini_flash_byok() {
386        let p = CloudflareAIGatewayProvider::gemini_flash("cf-tok", "acct", "gw");
387        assert_eq!(p.model(), "gemini-3-flash-preview");
388    }
389
390    #[test]
391    fn capabilities_resolve_anthropic() {
392        let p = CloudflareAIGatewayProvider::anthropic_sonnet("t", "a", "g");
393        let caps = p.capabilities().unwrap();
394        assert_eq!(caps.provider, "anthropic");
395        assert_eq!(caps.model_id, "claude-sonnet-4-6");
396        assert!(caps.supports_adaptive_thinking);
397    }
398
399    #[test]
400    fn capabilities_resolve_openai() {
401        let p = CloudflareAIGatewayProvider::openai_gpt54("t", "a", "g");
402        let caps = p.capabilities().unwrap();
403        assert_eq!(caps.provider, "openai");
404        assert_eq!(caps.model_id, "gpt-5.4");
405    }
406
407    #[test]
408    fn capabilities_resolve_gemini() {
409        let p = CloudflareAIGatewayProvider::gemini_pro("t", "a", "g");
410        let caps = p.capabilities().unwrap();
411        assert_eq!(caps.provider, "gemini");
412    }
413
414    #[test]
415    fn pass_through_with_gateway_token() {
416        let p = CloudflareAIGatewayProvider::anthropic(
417            "sk-ant-key".to_string(),
418            "acct",
419            "gw",
420            "claude-sonnet-4-6".to_string(),
421        )
422        .with_gateway_token("cf-tok");
423        assert_eq!(p.model(), "claude-sonnet-4-6");
424    }
425
426    #[test]
427    fn with_thinking_is_applied() {
428        let p = CloudflareAIGatewayProvider::anthropic_sonnet("t", "a", "g")
429            .with_thinking(ThinkingConfig::adaptive());
430        assert!(p.configured_thinking().is_some());
431    }
432
433    #[test]
434    fn provider_is_cloneable() {
435        let p = CloudflareAIGatewayProvider::anthropic_sonnet("t", "a", "g");
436        let cloned = p.clone();
437        assert_eq!(p.model(), cloned.model());
438    }
439
440    #[test]
441    fn gateway_url_format() {
442        assert_eq!(
443            gateway_base("my-acct", "my-gw", "anthropic"),
444            "https://gateway.ai.cloudflare.com/v1/my-acct/my-gw/anthropic"
445        );
446    }
447}