Skip to main content

mermaid_cli/models/
providers.rs

1//! Provider profiles for the OpenAI-compatible adapter.
2//!
3//! Every OpenAI-compatible provider (Groq, Together, Fireworks, OpenRouter,
4//! vLLM, DeepInfra, Cerebras, SambaNova, LMStudio, llama.cpp, …) speaks
5//! roughly the same `/v1/chat/completions` shape. The differences fit into
6//! two small dimensions:
7//!
8//!   1. How they want **reasoning depth** in the request. The de-facto
9//!      standard is a string `reasoning_effort: "low"|"medium"|"high"`
10//!      field; OpenRouter wraps it in a `reasoning: {effort: …}` object
11//!      and adds a few extras; some providers ignore reasoning entirely.
12//!   2. Where they put **reasoning content** in the streaming response.
13//!      Some emit `delta.reasoning_content`, some `delta.reasoning`, and
14//!      a couple stuff `<think>...</think>` tags inline in `delta.content`.
15//!
16//! `ProviderProfile` captures both dimensions plus base URL, auth env
17//! var, and any analytics headers (OpenRouter wants `HTTP-Referer` +
18//! `X-Title`). A `pub const REGISTRY` lists the known providers; users
19//! can override the URL / auth env / headers per-provider via
20//! `[providers.<name>]` in `config.toml` and add fully custom providers
21//! by reusing a known profile.
22
23use serde::Deserialize;
24use serde_json::{Value, json};
25
26use super::reasoning::{ReasoningChunk, ReasoningLevel};
27
28/// Static description of one OpenAI-compatible provider.
29#[derive(Debug, Clone)]
30pub struct ProviderProfile {
31    /// Provider identifier as it appears in model IDs (e.g. `"groq"` for
32    /// `groq/qwen-qwq-32b`). Lowercased; matched case-insensitively.
33    pub name: &'static str,
34    /// Default base URL for `/chat/completions` and friends. The trailing
35    /// `/v1` (or equivalent) is included so adapter code just appends
36    /// `/chat/completions` etc.
37    pub base_url: &'static str,
38    /// Default env var holding the API key. User config can override.
39    pub api_key_env: &'static str,
40    /// Headers always sent in addition to `Authorization: Bearer ...`.
41    /// OpenRouter requires `HTTP-Referer` + `X-Title` for its analytics
42    /// dashboard; everyone else uses an empty list.
43    pub extra_headers: &'static [(&'static str, &'static str)],
44    /// How to render `ReasoningLevel` into the request body.
45    pub reasoning_strategy: ReasoningStrategy,
46    /// Where reasoning content lives in the streaming response.
47    pub reasoning_extraction: ReasoningExtraction,
48}
49
50/// How to put `ReasoningLevel` onto the wire for a given provider.
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52pub enum ReasoningStrategy {
53    /// Provider exposes no reasoning controls (Together, DeepInfra
54    /// pass-through). Adapter sends nothing extra.
55    None,
56    /// Standard `reasoning_effort: "low"|"medium"|"high"` field
57    /// (OpenAI Chat Completions, Groq for gpt-oss, Cerebras for
58    /// gpt-oss-120b, Fireworks for Qwen 3, etc.).
59    Effort,
60    /// OpenRouter's normalized `reasoning: {effort: "..."}` nested
61    /// object. Supports `low`, `medium`, `high`, `max`. `None` becomes
62    /// `{exclude: true}` (suppresses reasoning).
63    OpenRouterShape,
64}
65
66impl ReasoningStrategy {
67    /// Render a `ReasoningLevel` to the JSON fragment that should be
68    /// merged into the `/chat/completions` request body. Returns `None`
69    /// if there's nothing to add (strategy is `None`, or the level is
70    /// `None` for a provider that signals via field omission).
71    pub fn render(&self, level: ReasoningLevel) -> Option<Value> {
72        match self {
73            ReasoningStrategy::None => None,
74            ReasoningStrategy::Effort => match level {
75                // `none` is the explicit off-tier on GPT-5.1+. Providers
76                // that don't understand it either silently ignore or 400 —
77                // which is a clearer failure than omitting the field when
78                // the user explicitly asked for it.
79                ReasoningLevel::None => Some(json!({"reasoning_effort": "none"})),
80                ReasoningLevel::Minimal => Some(json!({"reasoning_effort": "minimal"})),
81                ReasoningLevel::Low => Some(json!({"reasoning_effort": "low"})),
82                ReasoningLevel::Medium => Some(json!({"reasoning_effort": "medium"})),
83                ReasoningLevel::High => Some(json!({"reasoning_effort": "high"})),
84                // XHigh renders verbatim to "xhigh" — the dedicated OpenAI
85                // GPT-5.2+ tier. Non-OpenAI Effort providers (Groq,
86                // Cerebras, Fireworks) will 400 on "xhigh"; that's
87                // preferable to silently downgrading the user's explicit
88                // choice.
89                ReasoningLevel::XHigh => Some(json!({"reasoning_effort": "xhigh"})),
90                // Max collapses to "high" on Effort-shape providers.
91                // OpenAI's Effort enum doesn't have a "max" value (goes
92                // `...high | xhigh` and stops); users wanting OpenAI's
93                // top tier should pick `XHigh` explicitly. Providers
94                // with a genuine "max" tier (Anthropic, OpenRouter) use
95                // their own strategy, not this one.
96                ReasoningLevel::Max => Some(json!({"reasoning_effort": "high"})),
97            },
98            ReasoningStrategy::OpenRouterShape => match level {
99                ReasoningLevel::None => Some(json!({"reasoning": {"exclude": true}})),
100                ReasoningLevel::Minimal => Some(json!({"reasoning": {"effort": "low"}})),
101                ReasoningLevel::Low => Some(json!({"reasoning": {"effort": "low"}})),
102                ReasoningLevel::Medium => Some(json!({"reasoning": {"effort": "medium"}})),
103                ReasoningLevel::High => Some(json!({"reasoning": {"effort": "high"}})),
104                // OpenRouter has no `xhigh` tier. Since XHigh sits between
105                // High and Max, snap DOWN to `high` — the user picked
106                // something above high but below max; giving them max would
107                // over-deliver.
108                ReasoningLevel::XHigh => Some(json!({"reasoning": {"effort": "high"}})),
109                ReasoningLevel::Max => Some(json!({"reasoning": {"effort": "max"}})),
110            },
111        }
112    }
113}
114
115/// Where reasoning content shows up in a streaming response delta.
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub enum ReasoningExtraction {
118    /// Provider doesn't stream reasoning content (OpenAI Chat Completions
119    /// for o-series — encrypted server-side).
120    None,
121    /// Reasoning arrives in `delta.<field>` of every streaming chunk.
122    /// Common values: `"reasoning_content"` (vLLM, DeepInfra, DeepSeek)
123    /// and `"reasoning"` (Groq parsed mode, OpenRouter).
124    DeltaContentField(&'static str),
125    /// Reasoning is `<think>...</think>` inline in `delta.content`.
126    /// Together-R1, Groq raw mode, Fireworks `/think` suffix all do this.
127    /// Adapter strips tags and reroutes inside-tag bytes to the
128    /// reasoning channel via a streaming state machine.
129    InlineThinkTags,
130}
131
132impl ReasoningExtraction {
133    /// Pull reasoning content out of a streaming delta JSON. Returns
134    /// `None` if this strategy doesn't extract from the JSON body
135    /// (`None` and `InlineThinkTags`) or if the delta has no reasoning.
136    /// `InlineThinkTags` is handled separately at the byte-stream level
137    /// in the adapter; this method returns `None` for it.
138    pub fn parse_delta(&self, delta: &Value) -> Option<ReasoningChunk> {
139        match self {
140            ReasoningExtraction::None | ReasoningExtraction::InlineThinkTags => None,
141            ReasoningExtraction::DeltaContentField(field) => {
142                let text = delta.get(field).and_then(|v| v.as_str())?;
143                if text.is_empty() {
144                    None
145                } else {
146                    Some(ReasoningChunk {
147                        text: text.to_string(),
148                        signature: None,
149                    })
150                }
151            },
152        }
153    }
154}
155
156/// User-friendly string form for `compat = "..."` in config.toml when a
157/// fully custom provider needs to declare which profile shape to follow.
158#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
159#[serde(rename_all = "kebab-case")]
160pub enum CompatStyle {
161    /// Standard OpenAI Chat Completions shape, no reasoning extras
162    /// (matches Together, DeepInfra, Cerebras for non-gpt-oss models).
163    Openai,
164    /// Same shape but with `reasoning_effort` on requests.
165    OpenaiEffort,
166    /// OpenRouter's normalized reasoning object.
167    Openrouter,
168}
169
170impl CompatStyle {
171    pub fn reasoning_strategy(self) -> ReasoningStrategy {
172        match self {
173            CompatStyle::Openai => ReasoningStrategy::None,
174            CompatStyle::OpenaiEffort => ReasoningStrategy::Effort,
175            CompatStyle::Openrouter => ReasoningStrategy::OpenRouterShape,
176        }
177    }
178}
179
180/// Built-in provider registry. Lookups are case-insensitive on `name`.
181/// Add a provider here when its quirks fit the existing strategies; add
182/// a new `ReasoningStrategy` variant when a provider needs something
183/// the existing ones can't express.
184pub const REGISTRY: &[ProviderProfile] = &[
185    ProviderProfile {
186        name: "openai",
187        base_url: "https://api.openai.com/v1",
188        api_key_env: "OPENAI_API_KEY",
189        extra_headers: &[],
190        reasoning_strategy: ReasoningStrategy::Effort,
191        // Chat Completions doesn't stream reasoning content for o-series
192        // (encrypted server-side); only the Responses API does. Step 2
193        // targets Chat Completions, so None.
194        reasoning_extraction: ReasoningExtraction::None,
195    },
196    ProviderProfile {
197        name: "groq",
198        base_url: "https://api.groq.com/openai/v1",
199        api_key_env: "GROQ_API_KEY",
200        extra_headers: &[],
201        reasoning_strategy: ReasoningStrategy::Effort,
202        // Default `reasoning_format=parsed` routes reasoning to its own
203        // `delta.reasoning` field; we read it from there.
204        reasoning_extraction: ReasoningExtraction::DeltaContentField("reasoning"),
205    },
206    ProviderProfile {
207        name: "openrouter",
208        base_url: "https://openrouter.ai/api/v1",
209        api_key_env: "OPENROUTER_API_KEY",
210        extra_headers: &[
211            ("HTTP-Referer", "https://github.com/noahsabaj/mermaid-cli"),
212            // Canonical attribution header as of April 2026. OpenRouter
213            // still accepts `X-Title` for backward compat, but new code
214            // should emit `X-OpenRouter-Title`.
215            ("X-OpenRouter-Title", "Mermaid"),
216        ],
217        reasoning_strategy: ReasoningStrategy::OpenRouterShape,
218        reasoning_extraction: ReasoningExtraction::DeltaContentField("reasoning"),
219    },
220    ProviderProfile {
221        name: "cerebras",
222        base_url: "https://api.cerebras.ai/v1",
223        api_key_env: "CEREBRAS_API_KEY",
224        extra_headers: &[],
225        // Effort-style request param. `gpt-oss-120b` and `zai-glm-4.7`
226        // honor it (the latter accepts `none` to disable); other models
227        // silently ignore — wire shape is the same.
228        reasoning_strategy: ReasoningStrategy::Effort,
229        reasoning_extraction: ReasoningExtraction::None,
230    },
231    ProviderProfile {
232        name: "deepinfra",
233        base_url: "https://api.deepinfra.com/v1/openai",
234        api_key_env: "DEEPINFRA_API_KEY",
235        extra_headers: &[],
236        // Pass-through; reasoning shape per upstream model. Most R1-style
237        // models on DeepInfra emit `delta.reasoning_content`.
238        reasoning_strategy: ReasoningStrategy::None,
239        reasoning_extraction: ReasoningExtraction::DeltaContentField("reasoning_content"),
240    },
241    ProviderProfile {
242        name: "together",
243        base_url: "https://api.together.xyz/v1",
244        api_key_env: "TOGETHER_API_KEY",
245        extra_headers: &[],
246        reasoning_strategy: ReasoningStrategy::None,
247        // DeepSeek-R1 and friends on Together emit `<think>...</think>`
248        // inside `delta.content`. Adapter strips and reroutes.
249        reasoning_extraction: ReasoningExtraction::InlineThinkTags,
250    },
251];
252
253/// Look up a built-in provider by name. Case-insensitive.
254pub fn lookup_provider(name: &str) -> Option<&'static ProviderProfile> {
255    let lower = name.to_lowercase();
256    REGISTRY.iter().find(|p| p.name == lower)
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    // --- Registry lookup ---
264
265    #[test]
266    fn lookup_known_provider() {
267        let p = lookup_provider("groq").expect("groq is in the registry");
268        assert_eq!(p.name, "groq");
269        assert!(p.base_url.starts_with("https://api.groq.com"));
270        assert_eq!(p.api_key_env, "GROQ_API_KEY");
271    }
272
273    #[test]
274    fn lookup_is_case_insensitive() {
275        assert!(lookup_provider("OpenAI").is_some());
276        assert!(lookup_provider("OPENROUTER").is_some());
277    }
278
279    #[test]
280    fn lookup_unknown_provider() {
281        assert!(lookup_provider("does-not-exist").is_none());
282    }
283
284    #[test]
285    fn registry_has_six_providers() {
286        assert_eq!(REGISTRY.len(), 6);
287    }
288
289    #[test]
290    fn openrouter_has_analytics_headers() {
291        let p = lookup_provider("openrouter").unwrap();
292        let names: Vec<&str> = p.extra_headers.iter().map(|(k, _)| *k).collect();
293        assert!(names.contains(&"HTTP-Referer"));
294        // Canonical header name as of 2026-04. `X-Title` is still
295        // accepted for backward compat but new code emits the rebranded
296        // version.
297        assert!(names.contains(&"X-OpenRouter-Title"));
298    }
299
300    // --- ReasoningStrategy::render ---
301
302    #[test]
303    fn effort_renders_string_per_level() {
304        let s = ReasoningStrategy::Effort;
305        // `None` is now the explicit off-tier per GPT-5.1+; we emit the
306        // string rather than omitting the field so the user's choice
307        // reaches the provider.
308        assert_eq!(
309            s.render(ReasoningLevel::None),
310            Some(json!({"reasoning_effort": "none"})),
311        );
312        assert_eq!(
313            s.render(ReasoningLevel::Low),
314            Some(json!({"reasoning_effort": "low"})),
315        );
316        assert_eq!(
317            s.render(ReasoningLevel::Medium),
318            Some(json!({"reasoning_effort": "medium"})),
319        );
320        assert_eq!(
321            s.render(ReasoningLevel::High),
322            Some(json!({"reasoning_effort": "high"})),
323        );
324        // XHigh — OpenAI GPT-5.2+ tier. Sits between High and Max in
325        // our enum but on the wire it's OpenAI's actual top string.
326        // Providers that don't expose xhigh will 400.
327        assert_eq!(
328            s.render(ReasoningLevel::XHigh),
329            Some(json!({"reasoning_effort": "xhigh"})),
330        );
331        // Max collapses to high — OpenAI's Effort enum has no "max".
332        // Users wanting OpenAI's actual top tier should pick XHigh.
333        assert_eq!(
334            s.render(ReasoningLevel::Max),
335            Some(json!({"reasoning_effort": "high"})),
336        );
337    }
338
339    #[test]
340    fn openrouter_shape_renders_nested_object() {
341        let s = ReasoningStrategy::OpenRouterShape;
342        // None means "exclude" on OpenRouter — explicitly suppress
343        // reasoning rather than fall through to the model default.
344        assert_eq!(
345            s.render(ReasoningLevel::None),
346            Some(json!({"reasoning": {"exclude": true}})),
347        );
348        assert_eq!(
349            s.render(ReasoningLevel::Medium),
350            Some(json!({"reasoning": {"effort": "medium"}})),
351        );
352        assert_eq!(
353            s.render(ReasoningLevel::Max),
354            Some(json!({"reasoning": {"effort": "max"}})),
355        );
356        // OpenRouter has no xhigh tier; XHigh (between High and Max)
357        // snaps DOWN to `high` — don't over-deliver by bumping to max.
358        assert_eq!(
359            s.render(ReasoningLevel::XHigh),
360            Some(json!({"reasoning": {"effort": "high"}})),
361        );
362    }
363
364    #[test]
365    fn none_strategy_renders_nothing() {
366        let s = ReasoningStrategy::None;
367        for level in [
368            ReasoningLevel::None,
369            ReasoningLevel::Low,
370            ReasoningLevel::Medium,
371            ReasoningLevel::High,
372            ReasoningLevel::Max,
373        ] {
374            assert_eq!(s.render(level), None);
375        }
376    }
377
378    // --- ReasoningExtraction::parse_delta ---
379
380    #[test]
381    fn delta_field_extraction_finds_named_field() {
382        let e = ReasoningExtraction::DeltaContentField("reasoning_content");
383        let delta = json!({"reasoning_content": "weighing options", "content": ""});
384        let chunk = e.parse_delta(&delta).expect("should extract");
385        assert_eq!(chunk.text, "weighing options");
386        assert!(chunk.signature.is_none());
387    }
388
389    #[test]
390    fn delta_field_extraction_returns_none_when_absent() {
391        let e = ReasoningExtraction::DeltaContentField("reasoning_content");
392        let delta = json!({"content": "regular text"});
393        assert!(e.parse_delta(&delta).is_none());
394    }
395
396    #[test]
397    fn delta_field_extraction_returns_none_for_empty_string() {
398        let e = ReasoningExtraction::DeltaContentField("reasoning");
399        let delta = json!({"reasoning": ""});
400        assert!(e.parse_delta(&delta).is_none());
401    }
402
403    #[test]
404    fn none_extraction_always_returns_none() {
405        let e = ReasoningExtraction::None;
406        assert!(e.parse_delta(&json!({"reasoning_content": "x"})).is_none());
407    }
408
409    #[test]
410    fn inline_think_tags_does_not_parse_via_json() {
411        // Inline tags are handled at the byte-stream level in the
412        // adapter (Wave 6); this method always returns None for them.
413        let e = ReasoningExtraction::InlineThinkTags;
414        assert!(
415            e.parse_delta(&json!({"content": "<think>x</think>"}))
416                .is_none()
417        );
418    }
419
420    // --- CompatStyle ---
421
422    #[test]
423    fn compat_style_maps_to_strategy() {
424        assert_eq!(
425            CompatStyle::Openai.reasoning_strategy(),
426            ReasoningStrategy::None
427        );
428        assert_eq!(
429            CompatStyle::OpenaiEffort.reasoning_strategy(),
430            ReasoningStrategy::Effort
431        );
432        assert_eq!(
433            CompatStyle::Openrouter.reasoning_strategy(),
434            ReasoningStrategy::OpenRouterShape
435        );
436    }
437}