mermaid_cli/models/providers.rs
1//! Provider profiles for the OpenAI-compatible adapter.
2//!
3//! Every OpenAI-compatible provider (Groq, Together, Fireworks, OpenRouter,
4//! vLLM, DeepInfra, Cerebras, SambaNova, LMStudio, llama.cpp, …) speaks
5//! roughly the same `/v1/chat/completions` shape. The differences fit into
6//! two small dimensions:
7//!
8//! 1. How they want **reasoning depth** in the request. The de-facto
9//! standard is a string `reasoning_effort: "low"|"medium"|"high"`
10//! field; OpenRouter wraps it in a `reasoning: {effort: …}` object
11//! and adds a few extras; some providers ignore reasoning entirely.
12//! 2. Where they put **reasoning content** in the streaming response.
13//! Some emit `delta.reasoning_content`, some `delta.reasoning`, and
14//! a couple stuff `<think>...</think>` tags inline in `delta.content`.
15//!
16//! `ProviderProfile` captures both dimensions plus base URL, auth env
17//! var, and any analytics headers (OpenRouter wants `HTTP-Referer` +
18//! `X-Title`). A `pub const REGISTRY` lists the known providers; users
19//! can override the URL / auth env / headers per-provider via
20//! `[providers.<name>]` in `config.toml` and add fully custom providers
21//! by reusing a known profile.
22
23use serde::Deserialize;
24use serde_json::{Value, json};
25
26use super::reasoning::{ReasoningChunk, ReasoningLevel};
27
28/// Static description of one OpenAI-compatible provider.
29#[derive(Debug, Clone)]
30pub struct ProviderProfile {
31 /// Provider identifier as it appears in model IDs (e.g. `"groq"` for
32 /// `groq/qwen-qwq-32b`). Lowercased; matched case-insensitively.
33 pub name: &'static str,
34 /// Default base URL for `/chat/completions` and friends. The trailing
35 /// `/v1` (or equivalent) is included so adapter code just appends
36 /// `/chat/completions` etc.
37 pub base_url: &'static str,
38 /// Default env var holding the API key. User config can override.
39 pub api_key_env: &'static str,
40 /// Headers always sent in addition to `Authorization: Bearer ...`.
41 /// OpenRouter requires `HTTP-Referer` + `X-Title` for its analytics
42 /// dashboard; everyone else uses an empty list.
43 pub extra_headers: &'static [(&'static str, &'static str)],
44 /// How to render `ReasoningLevel` into the request body.
45 pub reasoning_strategy: ReasoningStrategy,
46 /// Where reasoning content lives in the streaming response.
47 pub reasoning_extraction: ReasoningExtraction,
48}
49
50/// How to put `ReasoningLevel` onto the wire for a given provider.
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52pub enum ReasoningStrategy {
53 /// Provider exposes no reasoning controls (Together, DeepInfra
54 /// pass-through). Adapter sends nothing extra.
55 None,
56 /// Standard `reasoning_effort: "low"|"medium"|"high"` field
57 /// (OpenAI Chat Completions, Groq for gpt-oss, Cerebras for
58 /// gpt-oss-120b, Fireworks for Qwen 3, etc.).
59 Effort,
60 /// OpenRouter's normalized `reasoning: {effort: "..."}` nested
61 /// object. Supports `low`, `medium`, `high`, `max`. `None` becomes
62 /// `{exclude: true}` (suppresses reasoning).
63 OpenRouterShape,
64}
65
66impl ReasoningStrategy {
67 /// Render a `ReasoningLevel` to the JSON fragment that should be
68 /// merged into the `/chat/completions` request body. Returns `None`
69 /// if there's nothing to add (strategy is `None`, or the level is
70 /// `None` for a provider that signals via field omission).
71 pub fn render(&self, level: ReasoningLevel) -> Option<Value> {
72 match self {
73 ReasoningStrategy::None => None,
74 ReasoningStrategy::Effort => match level {
75 // `none` is the explicit off-tier on GPT-5.1+. Providers
76 // that don't understand it either silently ignore or 400 —
77 // which is a clearer failure than omitting the field when
78 // the user explicitly asked for it.
79 ReasoningLevel::None => Some(json!({"reasoning_effort": "none"})),
80 ReasoningLevel::Minimal => Some(json!({"reasoning_effort": "minimal"})),
81 ReasoningLevel::Low => Some(json!({"reasoning_effort": "low"})),
82 ReasoningLevel::Medium => Some(json!({"reasoning_effort": "medium"})),
83 ReasoningLevel::High => Some(json!({"reasoning_effort": "high"})),
84 // XHigh renders verbatim to "xhigh" — the dedicated OpenAI
85 // GPT-5.2+ tier. Non-OpenAI Effort providers (Groq,
86 // Cerebras, Fireworks) will 400 on "xhigh"; that's
87 // preferable to silently downgrading the user's explicit
88 // choice.
89 ReasoningLevel::XHigh => Some(json!({"reasoning_effort": "xhigh"})),
90 // Max collapses to "high" on Effort-shape providers.
91 // OpenAI's Effort enum doesn't have a "max" value (goes
92 // `...high | xhigh` and stops); users wanting OpenAI's
93 // top tier should pick `XHigh` explicitly. Providers
94 // with a genuine "max" tier (Anthropic, OpenRouter) use
95 // their own strategy, not this one.
96 ReasoningLevel::Max => Some(json!({"reasoning_effort": "high"})),
97 },
98 ReasoningStrategy::OpenRouterShape => match level {
99 ReasoningLevel::None => Some(json!({"reasoning": {"exclude": true}})),
100 ReasoningLevel::Minimal => Some(json!({"reasoning": {"effort": "low"}})),
101 ReasoningLevel::Low => Some(json!({"reasoning": {"effort": "low"}})),
102 ReasoningLevel::Medium => Some(json!({"reasoning": {"effort": "medium"}})),
103 ReasoningLevel::High => Some(json!({"reasoning": {"effort": "high"}})),
104 // OpenRouter has no `xhigh` tier. Since XHigh sits between
105 // High and Max, snap DOWN to `high` — the user picked
106 // something above high but below max; giving them max would
107 // over-deliver.
108 ReasoningLevel::XHigh => Some(json!({"reasoning": {"effort": "high"}})),
109 ReasoningLevel::Max => Some(json!({"reasoning": {"effort": "max"}})),
110 },
111 }
112 }
113}
114
115/// Where reasoning content shows up in a streaming response delta.
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub enum ReasoningExtraction {
118 /// Provider doesn't stream reasoning content (OpenAI Chat Completions
119 /// for o-series — encrypted server-side).
120 None,
121 /// Reasoning arrives in `delta.<field>` of every streaming chunk.
122 /// Common values: `"reasoning_content"` (vLLM, DeepInfra, DeepSeek)
123 /// and `"reasoning"` (Groq parsed mode, OpenRouter).
124 DeltaContentField(&'static str),
125 /// Reasoning is `<think>...</think>` inline in `delta.content`.
126 /// Together-R1, Groq raw mode, Fireworks `/think` suffix all do this.
127 /// Adapter strips tags and reroutes inside-tag bytes to the
128 /// reasoning channel via a streaming state machine.
129 InlineThinkTags,
130}
131
132impl ReasoningExtraction {
133 /// Pull reasoning content out of a streaming delta JSON. Returns
134 /// `None` if this strategy doesn't extract from the JSON body
135 /// (`None` and `InlineThinkTags`) or if the delta has no reasoning.
136 /// `InlineThinkTags` is handled separately at the byte-stream level
137 /// in the adapter; this method returns `None` for it.
138 pub fn parse_delta(&self, delta: &Value) -> Option<ReasoningChunk> {
139 match self {
140 ReasoningExtraction::None | ReasoningExtraction::InlineThinkTags => None,
141 ReasoningExtraction::DeltaContentField(field) => {
142 let text = delta.get(field).and_then(|v| v.as_str())?;
143 if text.is_empty() {
144 None
145 } else {
146 Some(ReasoningChunk {
147 text: text.to_string(),
148 signature: None,
149 })
150 }
151 },
152 }
153 }
154}
155
156/// User-friendly string form for `compat = "..."` in config.toml when a
157/// fully custom provider needs to declare which profile shape to follow.
158#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
159#[serde(rename_all = "kebab-case")]
160pub enum CompatStyle {
161 /// Standard OpenAI Chat Completions shape, no reasoning extras
162 /// (matches Together, DeepInfra, Cerebras for non-gpt-oss models).
163 Openai,
164 /// Same shape but with `reasoning_effort` on requests.
165 OpenaiEffort,
166 /// OpenRouter's normalized reasoning object.
167 Openrouter,
168}
169
170impl CompatStyle {
171 pub fn reasoning_strategy(self) -> ReasoningStrategy {
172 match self {
173 CompatStyle::Openai => ReasoningStrategy::None,
174 CompatStyle::OpenaiEffort => ReasoningStrategy::Effort,
175 CompatStyle::Openrouter => ReasoningStrategy::OpenRouterShape,
176 }
177 }
178}
179
180/// Built-in provider registry. Lookups are case-insensitive on `name`.
181/// Add a provider here when its quirks fit the existing strategies; add
182/// a new `ReasoningStrategy` variant when a provider needs something
183/// the existing ones can't express.
184pub const REGISTRY: &[ProviderProfile] = &[
185 ProviderProfile {
186 name: "openai",
187 base_url: "https://api.openai.com/v1",
188 api_key_env: "OPENAI_API_KEY",
189 extra_headers: &[],
190 reasoning_strategy: ReasoningStrategy::Effort,
191 // Chat Completions doesn't stream reasoning content for o-series
192 // (encrypted server-side); only the Responses API does. Step 2
193 // targets Chat Completions, so None.
194 reasoning_extraction: ReasoningExtraction::None,
195 },
196 ProviderProfile {
197 name: "groq",
198 base_url: "https://api.groq.com/openai/v1",
199 api_key_env: "GROQ_API_KEY",
200 extra_headers: &[],
201 reasoning_strategy: ReasoningStrategy::Effort,
202 // Default `reasoning_format=parsed` routes reasoning to its own
203 // `delta.reasoning` field; we read it from there.
204 reasoning_extraction: ReasoningExtraction::DeltaContentField("reasoning"),
205 },
206 ProviderProfile {
207 name: "openrouter",
208 base_url: "https://openrouter.ai/api/v1",
209 api_key_env: "OPENROUTER_API_KEY",
210 extra_headers: &[
211 ("HTTP-Referer", "https://github.com/noahsabaj/mermaid-cli"),
212 // Canonical attribution header as of April 2026. OpenRouter
213 // still accepts `X-Title` for backward compat, but new code
214 // should emit `X-OpenRouter-Title`.
215 ("X-OpenRouter-Title", "Mermaid"),
216 ],
217 reasoning_strategy: ReasoningStrategy::OpenRouterShape,
218 reasoning_extraction: ReasoningExtraction::DeltaContentField("reasoning"),
219 },
220 ProviderProfile {
221 name: "cerebras",
222 base_url: "https://api.cerebras.ai/v1",
223 api_key_env: "CEREBRAS_API_KEY",
224 extra_headers: &[],
225 // Effort-style request param. `gpt-oss-120b` and `zai-glm-4.7`
226 // honor it (the latter accepts `none` to disable); other models
227 // silently ignore — wire shape is the same.
228 reasoning_strategy: ReasoningStrategy::Effort,
229 reasoning_extraction: ReasoningExtraction::None,
230 },
231 ProviderProfile {
232 name: "deepinfra",
233 base_url: "https://api.deepinfra.com/v1/openai",
234 api_key_env: "DEEPINFRA_API_KEY",
235 extra_headers: &[],
236 // Pass-through; reasoning shape per upstream model. Most R1-style
237 // models on DeepInfra emit `delta.reasoning_content`.
238 reasoning_strategy: ReasoningStrategy::None,
239 reasoning_extraction: ReasoningExtraction::DeltaContentField("reasoning_content"),
240 },
241 ProviderProfile {
242 name: "together",
243 base_url: "https://api.together.xyz/v1",
244 api_key_env: "TOGETHER_API_KEY",
245 extra_headers: &[],
246 reasoning_strategy: ReasoningStrategy::None,
247 // DeepSeek-R1 and friends on Together emit `<think>...</think>`
248 // inside `delta.content`. Adapter strips and reroutes.
249 reasoning_extraction: ReasoningExtraction::InlineThinkTags,
250 },
251];
252
253/// Look up a built-in provider by name. Case-insensitive.
254pub fn lookup_provider(name: &str) -> Option<&'static ProviderProfile> {
255 let lower = name.to_lowercase();
256 REGISTRY.iter().find(|p| p.name == lower)
257}
258
259#[cfg(test)]
260mod tests {
261 use super::*;
262
263 // --- Registry lookup ---
264
265 #[test]
266 fn lookup_known_provider() {
267 let p = lookup_provider("groq").expect("groq is in the registry");
268 assert_eq!(p.name, "groq");
269 assert!(p.base_url.starts_with("https://api.groq.com"));
270 assert_eq!(p.api_key_env, "GROQ_API_KEY");
271 }
272
273 #[test]
274 fn lookup_is_case_insensitive() {
275 assert!(lookup_provider("OpenAI").is_some());
276 assert!(lookup_provider("OPENROUTER").is_some());
277 }
278
279 #[test]
280 fn lookup_unknown_provider() {
281 assert!(lookup_provider("does-not-exist").is_none());
282 }
283
284 #[test]
285 fn registry_has_six_providers() {
286 assert_eq!(REGISTRY.len(), 6);
287 }
288
289 #[test]
290 fn openrouter_has_analytics_headers() {
291 let p = lookup_provider("openrouter").unwrap();
292 let names: Vec<&str> = p.extra_headers.iter().map(|(k, _)| *k).collect();
293 assert!(names.contains(&"HTTP-Referer"));
294 // Canonical header name as of 2026-04. `X-Title` is still
295 // accepted for backward compat but new code emits the rebranded
296 // version.
297 assert!(names.contains(&"X-OpenRouter-Title"));
298 }
299
300 // --- ReasoningStrategy::render ---
301
302 #[test]
303 fn effort_renders_string_per_level() {
304 let s = ReasoningStrategy::Effort;
305 // `None` is now the explicit off-tier per GPT-5.1+; we emit the
306 // string rather than omitting the field so the user's choice
307 // reaches the provider.
308 assert_eq!(
309 s.render(ReasoningLevel::None),
310 Some(json!({"reasoning_effort": "none"})),
311 );
312 assert_eq!(
313 s.render(ReasoningLevel::Low),
314 Some(json!({"reasoning_effort": "low"})),
315 );
316 assert_eq!(
317 s.render(ReasoningLevel::Medium),
318 Some(json!({"reasoning_effort": "medium"})),
319 );
320 assert_eq!(
321 s.render(ReasoningLevel::High),
322 Some(json!({"reasoning_effort": "high"})),
323 );
324 // XHigh — OpenAI GPT-5.2+ tier. Sits between High and Max in
325 // our enum but on the wire it's OpenAI's actual top string.
326 // Providers that don't expose xhigh will 400.
327 assert_eq!(
328 s.render(ReasoningLevel::XHigh),
329 Some(json!({"reasoning_effort": "xhigh"})),
330 );
331 // Max collapses to high — OpenAI's Effort enum has no "max".
332 // Users wanting OpenAI's actual top tier should pick XHigh.
333 assert_eq!(
334 s.render(ReasoningLevel::Max),
335 Some(json!({"reasoning_effort": "high"})),
336 );
337 }
338
339 #[test]
340 fn openrouter_shape_renders_nested_object() {
341 let s = ReasoningStrategy::OpenRouterShape;
342 // None means "exclude" on OpenRouter — explicitly suppress
343 // reasoning rather than fall through to the model default.
344 assert_eq!(
345 s.render(ReasoningLevel::None),
346 Some(json!({"reasoning": {"exclude": true}})),
347 );
348 assert_eq!(
349 s.render(ReasoningLevel::Medium),
350 Some(json!({"reasoning": {"effort": "medium"}})),
351 );
352 assert_eq!(
353 s.render(ReasoningLevel::Max),
354 Some(json!({"reasoning": {"effort": "max"}})),
355 );
356 // OpenRouter has no xhigh tier; XHigh (between High and Max)
357 // snaps DOWN to `high` — don't over-deliver by bumping to max.
358 assert_eq!(
359 s.render(ReasoningLevel::XHigh),
360 Some(json!({"reasoning": {"effort": "high"}})),
361 );
362 }
363
364 #[test]
365 fn none_strategy_renders_nothing() {
366 let s = ReasoningStrategy::None;
367 for level in [
368 ReasoningLevel::None,
369 ReasoningLevel::Low,
370 ReasoningLevel::Medium,
371 ReasoningLevel::High,
372 ReasoningLevel::Max,
373 ] {
374 assert_eq!(s.render(level), None);
375 }
376 }
377
378 // --- ReasoningExtraction::parse_delta ---
379
380 #[test]
381 fn delta_field_extraction_finds_named_field() {
382 let e = ReasoningExtraction::DeltaContentField("reasoning_content");
383 let delta = json!({"reasoning_content": "weighing options", "content": ""});
384 let chunk = e.parse_delta(&delta).expect("should extract");
385 assert_eq!(chunk.text, "weighing options");
386 assert!(chunk.signature.is_none());
387 }
388
389 #[test]
390 fn delta_field_extraction_returns_none_when_absent() {
391 let e = ReasoningExtraction::DeltaContentField("reasoning_content");
392 let delta = json!({"content": "regular text"});
393 assert!(e.parse_delta(&delta).is_none());
394 }
395
396 #[test]
397 fn delta_field_extraction_returns_none_for_empty_string() {
398 let e = ReasoningExtraction::DeltaContentField("reasoning");
399 let delta = json!({"reasoning": ""});
400 assert!(e.parse_delta(&delta).is_none());
401 }
402
403 #[test]
404 fn none_extraction_always_returns_none() {
405 let e = ReasoningExtraction::None;
406 assert!(e.parse_delta(&json!({"reasoning_content": "x"})).is_none());
407 }
408
409 #[test]
410 fn inline_think_tags_does_not_parse_via_json() {
411 // Inline tags are handled at the byte-stream level in the
412 // adapter (Wave 6); this method always returns None for them.
413 let e = ReasoningExtraction::InlineThinkTags;
414 assert!(
415 e.parse_delta(&json!({"content": "<think>x</think>"}))
416 .is_none()
417 );
418 }
419
420 // --- CompatStyle ---
421
422 #[test]
423 fn compat_style_maps_to_strategy() {
424 assert_eq!(
425 CompatStyle::Openai.reasoning_strategy(),
426 ReasoningStrategy::None
427 );
428 assert_eq!(
429 CompatStyle::OpenaiEffort.reasoning_strategy(),
430 ReasoningStrategy::Effort
431 );
432 assert_eq!(
433 CompatStyle::Openrouter.reasoning_strategy(),
434 ReasoningStrategy::OpenRouterShape
435 );
436 }
437}