Skip to main content

axon/backends/
openrouter.rs

1//! OpenRouter (multi-provider gateway) backend — Fase 24.i.
2//!
3//! Thin factory + slug-aware capability override on top of
4//! [`OpenAICompatibleBackend`]. OpenRouter speaks OpenAI-compat verbatim
5//! (Bearer auth, `/v1/chat/completions`, OpenAI tool envelope) and
6//! routes requests to underlying providers based on the
7//! `provider/model` slug form (e.g. `openai/gpt-4o-mini`,
8//! `anthropic/claude-sonnet-4-5`, `google/gemini-2.5-flash`,
9//! `moonshot/kimi-k2.6`).
10//!
11//! What this module adds on top of the shared base:
12//!
13//!   * [`from_env`] / [`with_api_key`] factories that pin
14//!     [`OpenAICompatConfig::openrouter`] (base URL
15//!     `https://openrouter.ai/api`, default model
16//!     `openai/gpt-4o-mini`, env `OPENROUTER_API_KEY`).
17//!   * **Slug-aware Vision dispatch** — `Capability::Vision` consults
18//!     the underlying model name (e.g. `openai/gpt-4o-mini` → true
19//!     because gpt-4o family supports vision; `meta/llama-3.1-70b` →
20//!     false because Llama 3.1 is text-only). The dispatch matches
21//!     the per-provider rules from `openai.rs` / `gemini.rs` /
22//!     `anthropic.rs` / `glm.rs` / `ollama.rs` — adopters get
23//!     consistent `supports()` answers regardless of whether they
24//!     route through OpenRouter or call the provider directly.
25//!   * **Locked-model dispatch works on slug form** — the v1.16.2
26//!     `apply_sampling_params` machinery normalises slug-form names
27//!     (strips `provider/` prefix) before pattern matching, so
28//!     `openai/o1-mini` correctly strips the locked params. Confirmed
29//!     by the parametric test `body_strips_locked_params_for_slug_form`.
30//!   * **count_tokens slug-aware** — overridden to strip the
31//!     `provider/` prefix before delegating to the unified
32//!     [`tokens::count_tokens`] dispatch, so an adopter passing
33//!     `openai/gpt-4o-mini` gets the exact `o200k_base` count rather
34//!     than the 4-cpt fallback.
35//!
36//! # Example
37//!
38//! ```ignore
39//! use axon::backends::{openrouter, Backend, ChatRequest, Message};
40//!
41//! let backend = openrouter::from_env();
42//! let request = ChatRequest {
43//!     model: "anthropic/claude-sonnet-4-5".into(),
44//!     messages: vec![Message::user("Translate to Chinese: Hello, world!")],
45//!     ..Default::default()
46//! };
47//! let response = backend.complete(request).await?;
48//! ```
49
50use std::env;
51use std::pin::Pin;
52
53use async_trait::async_trait;
54use futures::Stream;
55
56use super::error::BackendError;
57use super::openai_compat::{OpenAICompatConfig, OpenAICompatibleBackend};
58use super::tokens;
59use super::{Backend, Capability, ChatRequest, ChatResponse, ChatStream};
60
61const API_KEY_ENV: &str = "OPENROUTER_API_KEY";
62
63/// OpenRouter multi-provider gateway. Composes
64/// [`OpenAICompatibleBackend`] with the OpenRouter preset + slug-aware
65/// capability + tokens overrides.
66pub struct OpenRouterBackend {
67    inner: OpenAICompatibleBackend,
68}
69
70impl OpenRouterBackend {
71    /// Construct from env. `OPENROUTER_API_KEY` is read at construction
72    /// time; `None` is permitted (auth check fires at first call).
73    pub fn from_env() -> Self {
74        Self::with_api_key(env::var(API_KEY_ENV).ok())
75    }
76
77    /// Construct with an explicit API key (or `None`).
78    pub fn with_api_key(api_key: Option<String>) -> Self {
79        Self {
80            inner: OpenAICompatibleBackend::new(
81                OpenAICompatConfig::openrouter(),
82                api_key,
83            ),
84        }
85    }
86
87    /// Override the base URL (test fixtures, regional OpenRouter
88    /// endpoints).
89    pub fn with_base_url(mut self, base_url: impl Into<String>) -> Self {
90        self.inner = self.inner.with_base_url(base_url);
91        self
92    }
93
94    /// Override the default slug. Useful when an adopter pins a
95    /// specific model (e.g. `anthropic/claude-haiku-4-5` for cheap
96    /// drafting).
97    pub fn with_default_model(mut self, model: impl Into<String>) -> Self {
98        self.inner = self.inner.with_default_model(model);
99        self
100    }
101
102    /// Borrow the underlying [`OpenAICompatibleBackend`] (for testing
103    /// fixtures that need access to the composed inner state).
104    pub fn inner(&self) -> &OpenAICompatibleBackend {
105        &self.inner
106    }
107}
108
109impl Default for OpenRouterBackend {
110    fn default() -> Self {
111        Self::from_env()
112    }
113}
114
115#[async_trait]
116impl Backend for OpenRouterBackend {
117    fn name(&self) -> &str {
118        self.inner.name()
119    }
120
121    fn default_model(&self) -> &str {
122        self.inner.default_model()
123    }
124
125    async fn complete(&self, request: ChatRequest) -> Result<ChatResponse, BackendError> {
126        self.inner.complete(request).await
127    }
128
129    async fn stream(&self, request: ChatRequest) -> Result<ChatStream, BackendError> {
130        self.inner.stream(request).await
131    }
132
133    fn count_tokens(&self, model: &str, text: &str) -> usize {
134        // Strip the `provider/` prefix and delegate to the unified
135        // dispatch, so `openai/gpt-4o-mini` gets the exact
136        // `o200k_base` count and `moonshot/kimi-k2.6` gets the
137        // exact `cl100k_base` count instead of the 4-cpt fallback
138        // that an unrecognised slug would receive.
139        let underlying = strip_provider_prefix(model);
140        tokens::count_tokens(underlying, text).count
141    }
142
143    fn supports(&self, capability: Capability, model: &str) -> bool {
144        match capability {
145            Capability::Vision => slug_supports_vision(model),
146            // LockedParams = true iff the underlying slug matches a
147            // locked-model family (Kimi K2.x, OpenAI o1, OpenAI o3).
148            // The shared base already consults
149            // `locked_model::locked_params_for_model`, which 24.i
150            // updated to normalise slug-form names — so this returns
151            // the correct answer for `openai/o1-mini`,
152            // `moonshot/kimi-k2.6`, etc.
153            other => self.inner.supports(other, model),
154        }
155    }
156}
157
158// ────────────────────────────────────────────────────────────────────
159//  Slug helpers
160// ────────────────────────────────────────────────────────────────────
161
162/// Strip the `provider/` prefix from a slug-form model identifier.
163/// `openai/gpt-4o-mini` → `gpt-4o-mini`. Returns the input unchanged
164/// when there's no `/`.
165fn strip_provider_prefix(model: &str) -> &str {
166    model.split_once('/').map(|(_, rest)| rest).unwrap_or(model)
167}
168
169/// Determine Vision support for an OpenRouter slug. Mirrors the
170/// per-provider rules from `openai.rs` / `gemini.rs` / `anthropic.rs`
171/// / `glm.rs` / `ollama.rs` so adopters get consistent `supports()`
172/// answers regardless of routing.
173fn slug_supports_vision(model: &str) -> bool {
174    let lc = model.to_lowercase();
175    let (provider, name) = match lc.split_once('/') {
176        Some((p, n)) => (p, n),
177        // Bare model name (no slug) — treat conservatively, default false.
178        None => return false,
179    };
180    match provider {
181        // OpenAI: gpt-4o family supports vision; o1 / o3 are text-only.
182        "openai" => name.starts_with("gpt-4o"),
183        // Anthropic: every Claude 3+ family supports vision (claude-3,
184        // claude-3.5, claude-4, claude-haiku-4-5, claude-sonnet-4-5).
185        "anthropic" => name.starts_with("claude-"),
186        // Google Gemini: 1.5 / 2.0 / 2.5 families are multimodal.
187        "google" => name.contains("1.5") || name.contains("2.0") || name.contains("2.5"),
188        // Mistral / Meta / Qwen / Microsoft / DeepSeek: only specific
189        // multimodal SKUs. Match on documented family names.
190        "meta" | "meta-llama" => name.contains("llama-3.2-vision") || name.contains("llava"),
191        "qwen" => name.contains("vl"),
192        "microsoft" => name.contains("phi-3.5-vision") || name.contains("phi-4-vision"),
193        // GLM: 4v family is multimodal.
194        "zhipu" | "glm" | "z-ai" => name.starts_with("glm-4v"),
195        // Mistral: pixtral family supports vision.
196        "mistralai" | "mistral" => name.contains("pixtral"),
197        // Conservative default — explicit per-family list above.
198        _ => false,
199    }
200}
201
202// ────────────────────────────────────────────────────────────────────
203//  Module-level factories
204// ────────────────────────────────────────────────────────────────────
205
206/// Construct an OpenRouter backend using the `OPENROUTER_API_KEY` env var.
207pub fn from_env() -> OpenRouterBackend {
208    OpenRouterBackend::from_env()
209}
210
211/// Construct an OpenRouter backend with an explicit API key (or `None`).
212pub fn with_api_key(api_key: Option<String>) -> OpenRouterBackend {
213    OpenRouterBackend::with_api_key(api_key)
214}
215
216#[allow(dead_code)]
217type OpenRouterChatStream =
218    Pin<Box<dyn Stream<Item = Result<crate::backends::ChatChunk, BackendError>> + Send>>;
219
220// ────────────────────────────────────────────────────────────────────
221//  Tests
222// ────────────────────────────────────────────────────────────────────
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227    use crate::backends::openai_compat::build_request_body;
228    use crate::backends::Message;
229
230    fn req_with(messages: Vec<Message>) -> ChatRequest {
231        ChatRequest {
232            model: String::new(),
233            messages,
234            ..Default::default()
235        }
236    }
237
238    // ── Construction ────────────────────────────────────────────────
239
240    #[test]
241    fn from_env_constructs_openrouter_backend() {
242        let b = OpenRouterBackend::from_env();
243        assert_eq!(b.name(), "openrouter");
244        assert_eq!(b.default_model(), "openai/gpt-4o-mini");
245    }
246
247    #[test]
248    fn module_factory_from_env_works() {
249        let b = from_env();
250        assert_eq!(b.name(), "openrouter");
251    }
252
253    #[test]
254    fn module_factory_with_api_key_explicit() {
255        let b = with_api_key(Some("sk-or-v1-test".into()));
256        assert_eq!(b.name(), "openrouter");
257    }
258
259    #[test]
260    fn with_default_model_overrides() {
261        let b = OpenRouterBackend::with_api_key(Some("k".into()))
262            .with_default_model("anthropic/claude-haiku-4-5");
263        assert_eq!(b.default_model(), "anthropic/claude-haiku-4-5");
264    }
265
266    #[test]
267    fn with_base_url_overrides_for_test_fixtures() {
268        let _b = OpenRouterBackend::with_api_key(Some("k".into()))
269            .with_base_url("http://localhost:9999");
270    }
271
272    #[test]
273    fn inner_accessor_returns_compat_backend() {
274        let b = OpenRouterBackend::with_api_key(Some("k".into()));
275        assert_eq!(b.inner().name(), "openrouter");
276    }
277
278    #[test]
279    fn default_constructs_via_from_env() {
280        let b = OpenRouterBackend::default();
281        assert_eq!(b.name(), "openrouter");
282    }
283
284    // ── Slug helpers ────────────────────────────────────────────────
285
286    #[test]
287    fn strip_provider_prefix_returns_model_only() {
288        assert_eq!(strip_provider_prefix("openai/gpt-4o-mini"), "gpt-4o-mini");
289        assert_eq!(
290            strip_provider_prefix("anthropic/claude-sonnet-4-5"),
291            "claude-sonnet-4-5"
292        );
293        assert_eq!(strip_provider_prefix("moonshot/kimi-k2.6"), "kimi-k2.6");
294    }
295
296    #[test]
297    fn strip_provider_prefix_idempotent_for_bare_names() {
298        assert_eq!(strip_provider_prefix("gpt-4o-mini"), "gpt-4o-mini");
299        assert_eq!(strip_provider_prefix(""), "");
300    }
301
302    // ── Capability: Vision dispatch by underlying model ─────────────
303
304    #[test]
305    fn supports_vision_for_openai_gpt_4o_slug() {
306        let b = OpenRouterBackend::with_api_key(Some("k".into()));
307        assert!(b.supports(Capability::Vision, "openai/gpt-4o-mini"));
308        assert!(b.supports(Capability::Vision, "openai/gpt-4o-2024-08-06"));
309    }
310
311    #[test]
312    fn does_not_support_vision_for_openai_o1_o3_slugs() {
313        let b = OpenRouterBackend::with_api_key(Some("k".into()));
314        assert!(!b.supports(Capability::Vision, "openai/o1-mini"));
315        assert!(!b.supports(Capability::Vision, "openai/o3"));
316        assert!(!b.supports(Capability::Vision, "openai/o3-mini"));
317    }
318
319    #[test]
320    fn supports_vision_for_anthropic_claude_slugs() {
321        let b = OpenRouterBackend::with_api_key(Some("k".into()));
322        assert!(b.supports(Capability::Vision, "anthropic/claude-sonnet-4-5"));
323        assert!(b.supports(Capability::Vision, "anthropic/claude-haiku-4-5"));
324        assert!(b.supports(Capability::Vision, "anthropic/claude-3-5-sonnet"));
325    }
326
327    #[test]
328    fn supports_vision_for_google_gemini_15_20_25_slugs() {
329        let b = OpenRouterBackend::with_api_key(Some("k".into()));
330        assert!(b.supports(Capability::Vision, "google/gemini-1.5-pro"));
331        assert!(b.supports(Capability::Vision, "google/gemini-2.0-flash"));
332        assert!(b.supports(Capability::Vision, "google/gemini-2.5-pro"));
333        assert!(b.supports(Capability::Vision, "google/gemini-2.5-flash"));
334    }
335
336    #[test]
337    fn does_not_support_vision_for_legacy_gemini_pro() {
338        let b = OpenRouterBackend::with_api_key(Some("k".into()));
339        assert!(!b.supports(Capability::Vision, "google/gemini-pro"));
340        assert!(!b.supports(Capability::Vision, "google/gemini-1.0-pro"));
341    }
342
343    #[test]
344    fn supports_vision_for_meta_llama_vision_and_llava() {
345        let b = OpenRouterBackend::with_api_key(Some("k".into()));
346        assert!(b.supports(Capability::Vision, "meta-llama/llama-3.2-vision-11b"));
347        assert!(b.supports(Capability::Vision, "meta-llama/llava-llama-3"));
348    }
349
350    #[test]
351    fn does_not_support_vision_for_text_only_meta_llama() {
352        let b = OpenRouterBackend::with_api_key(Some("k".into()));
353        assert!(!b.supports(Capability::Vision, "meta-llama/llama-3.1-70b-instruct"));
354        assert!(!b.supports(Capability::Vision, "meta-llama/llama-3.3-70b-instruct"));
355    }
356
357    #[test]
358    fn supports_vision_for_qwen_vl_slugs() {
359        let b = OpenRouterBackend::with_api_key(Some("k".into()));
360        assert!(b.supports(Capability::Vision, "qwen/qwen2-vl-7b-instruct"));
361        assert!(b.supports(Capability::Vision, "qwen/qwen2.5-vl-72b-instruct"));
362    }
363
364    #[test]
365    fn supports_vision_for_mistral_pixtral() {
366        let b = OpenRouterBackend::with_api_key(Some("k".into()));
367        assert!(b.supports(Capability::Vision, "mistralai/pixtral-12b-2409"));
368    }
369
370    #[test]
371    fn does_not_support_vision_for_text_only_mistral() {
372        let b = OpenRouterBackend::with_api_key(Some("k".into()));
373        assert!(!b.supports(Capability::Vision, "mistralai/mistral-large"));
374    }
375
376    #[test]
377    fn does_not_support_vision_for_bare_model_name() {
378        // A bare model without a slug is conservative-default false —
379        // OpenRouter API expects the slug form so this case is unusual
380        // anyway, but the dispatch must not crash.
381        let b = OpenRouterBackend::with_api_key(Some("k".into()));
382        assert!(!b.supports(Capability::Vision, "gpt-4o-mini"));
383    }
384
385    #[test]
386    fn does_not_support_vision_for_unknown_provider() {
387        let b = OpenRouterBackend::with_api_key(Some("k".into()));
388        assert!(!b.supports(Capability::Vision, "newprovider/exotic-model-7b"));
389    }
390
391    // ── LockedParams via slug normalisation in shared base ──────────
392
393    #[test]
394    fn supports_lockedparams_for_openai_o1_o3_slugs() {
395        let b = OpenRouterBackend::with_api_key(Some("k".into()));
396        assert!(b.supports(Capability::LockedParams, "openai/o1-mini"));
397        assert!(b.supports(Capability::LockedParams, "openai/o3"));
398        assert!(b.supports(Capability::LockedParams, "openai/o3-mini"));
399    }
400
401    #[test]
402    fn supports_lockedparams_for_moonshot_kimi_k2_slug() {
403        let b = OpenRouterBackend::with_api_key(Some("k".into()));
404        assert!(b.supports(Capability::LockedParams, "moonshot/kimi-k2.6"));
405        assert!(b.supports(Capability::LockedParams, "moonshot/kimi-k2.8"));
406    }
407
408    #[test]
409    fn does_not_support_lockedparams_for_chat_slugs() {
410        let b = OpenRouterBackend::with_api_key(Some("k".into()));
411        assert!(!b.supports(Capability::LockedParams, "openai/gpt-4o-mini"));
412        assert!(!b.supports(Capability::LockedParams, "anthropic/claude-sonnet-4-5"));
413        assert!(!b.supports(Capability::LockedParams, "google/gemini-2.5-pro"));
414    }
415
416    // ── Body builder strips locked params on slug form ──────────────
417
418    #[test]
419    fn body_strips_locked_params_for_slug_form_o1() {
420        // The Kivi v1.16.2 incident, but routed through OpenRouter:
421        // adopter sends `openai/o1-mini` with temperature; the shared
422        // base must strip it before forwarding to OpenRouter (which
423        // forwards to OpenAI, which would otherwise return HTTP 400).
424        let mut req = req_with(vec![Message::user("hi")]);
425        req.model = "openai/o1-mini".into();
426        req.temperature = Some(0.7);
427        let body = build_request_body(&req, "openai/gpt-4o-mini", false);
428        assert!(body.get("temperature").is_none());
429    }
430
431    #[test]
432    fn body_strips_locked_params_for_slug_form_kimi_k2() {
433        let mut req = req_with(vec![Message::user("hi")]);
434        req.model = "moonshot/kimi-k2.6".into();
435        req.temperature = Some(0.5);
436        req.top_p = Some(0.9);
437        let body = build_request_body(&req, "openai/gpt-4o-mini", false);
438        assert!(body.get("temperature").is_none());
439        assert!(body.get("top_p").is_none());
440    }
441
442    #[test]
443    fn body_keeps_sampling_params_for_unlocked_slug() {
444        let mut req = req_with(vec![Message::user("hi")]);
445        req.model = "openai/gpt-4o-mini".into();
446        req.temperature = Some(0.5);
447        let body = build_request_body(&req, "openai/gpt-4o-mini", false);
448        assert_eq!(body["temperature"], 0.5);
449    }
450
451    // ── count_tokens slug-aware ─────────────────────────────────────
452
453    #[test]
454    fn count_tokens_uses_o200k_for_openai_gpt_4o_slug() {
455        let b = OpenRouterBackend::with_api_key(Some("k".into()));
456        // `openai/gpt-4o-mini` → strip → `gpt-4o-mini` → o200k_base.
457        let n = b.count_tokens("openai/gpt-4o-mini", "hello world");
458        // Exact tokenizer reports 1-5 tokens for "hello world".
459        assert!(n > 0);
460        assert!(n <= 5);
461    }
462
463    #[test]
464    fn count_tokens_uses_cl100k_for_moonshot_slug() {
465        let b = OpenRouterBackend::with_api_key(Some("k".into()));
466        // `moonshot/kimi-k2.6` → strip → `kimi-k2.6` → cl100k_base.
467        let n = b.count_tokens("moonshot/kimi-k2.6", "hello world");
468        assert!(n > 0);
469    }
470
471    #[test]
472    fn count_tokens_uses_estimate_for_anthropic_slug() {
473        let b = OpenRouterBackend::with_api_key(Some("k".into()));
474        // Claude has no offline tokenizer in tiktoken-rs → estimate.
475        // 8 chars → 2 tokens.
476        assert_eq!(b.count_tokens("anthropic/claude-sonnet-4-5", "ABCDEFGH"), 2);
477    }
478
479    // ── Streaming surface ───────────────────────────────────────────
480
481    #[tokio::test]
482    async fn stream_delegates_to_base_real_sse_implementation() {
483        // §Fase 33.d — OpenRouter delegates to OpenAI-compat which now
484        // ships a real SSE streamer; unreachable-port test exercises
485        // the transport-error path.
486        let b = OpenRouterBackend::with_api_key(Some("k".into()))
487            .with_base_url("http://127.0.0.1:1");
488        match b.stream(ChatRequest::default()).await {
489            Err(BackendError::Generic { ref message, .. }) => {
490                assert!(
491                    message.contains("streaming transport failure")
492                        || message.contains("transport"),
493                    "unexpected message: {message}"
494                );
495            }
496            Err(other) => panic!("expected Generic, got {other:?}"),
497            Ok(_) => panic!("expected error, got Ok"),
498        }
499    }
500
501    // ── complete() — early failure paths ────────────────────────────
502
503    #[tokio::test]
504    async fn complete_without_api_key_returns_auth_error() {
505        let b =
506            OpenRouterBackend::with_api_key(None).with_base_url("http://127.0.0.1:0");
507        let err = b
508            .complete(ChatRequest {
509                messages: vec![Message::user("hi")],
510                ..Default::default()
511            })
512            .await
513            .unwrap_err();
514        match err {
515            BackendError::Auth { api_key_env, .. } => {
516                assert_eq!(api_key_env.as_deref(), Some(API_KEY_ENV));
517            }
518            other => panic!("expected Auth, got {other:?}"),
519        }
520    }
521
522    // ── Standard caps via base ──────────────────────────────────────
523
524    #[test]
525    fn supports_streaming_tooluse_structured_via_base() {
526        let b = OpenRouterBackend::with_api_key(Some("k".into()));
527        let any = "openai/gpt-4o-mini";
528        assert!(b.supports(Capability::Streaming, any));
529        assert!(b.supports(Capability::ToolUse, any));
530        assert!(b.supports(Capability::StructuredOutput, any));
531    }
532
533    #[test]
534    fn does_not_support_anthropic_or_gemini_only_caps() {
535        let b = OpenRouterBackend::with_api_key(Some("k".into()));
536        let any = "openai/gpt-4o-mini";
537        assert!(!b.supports(Capability::PromptCaching, any));
538        assert!(!b.supports(Capability::SafetySettings, any));
539    }
540}