Skip to main content

mermaid_cli/models/
reasoning.rs

1//! Reasoning depth abstraction for multi-provider model adapters.
2//!
3//! All major LLM providers have converged on string-enum reasoning effort
4//! controls (OpenAI `reasoning_effort`, Anthropic adaptive `effort`,
5//! Gemini 3 `thinking_level`, Groq, Fireworks, vLLM-on-gpt-oss, OpenRouter
6//! normalized). Numeric token-budget knobs (legacy Anthropic `budget_tokens`,
7//! legacy Gemini `thinking_budget`) are being phased out.
8//!
9//! Mermaid exposes a single `ReasoningLevel` enum at the user surface; each
10//! adapter maps it to provider-native shapes via dedicated functions. The
11//! enum shape mirrors OpenAI Codex's `ReasoningEffort` (the closest Rust
12//! prior art), with `XHigh` renamed to `Max` for cleaner Mermaid UX. Six
13//! variants beats three because Anthropic and OpenAI's frontier models
14//! both ship the extreme tier and the `None` / `Minimal` distinction
15//! matters for cost-conscious workloads.
16
17use clap::ValueEnum;
18use serde::{Deserialize, Serialize};
19
20/// Reasoning depth requested for a model call.
21///
22/// `Default` is `Medium` — matches OpenAI's `reasoning_effort` default and
23/// is the level that produces useful chain-of-thought without burning
24/// excessive latency / tokens for routine prompts.
25///
26/// String form is lowercase (`"none"`, `"minimal"`, `"low"`, `"medium"`,
27/// `"high"`, `"max"`) so config files and CLI flags read naturally.
28/// `ValueEnum` is derived so clap can parse `--reasoning <level>`
29/// directly from the user-facing strings — no custom parser layer.
30#[derive(
31    Debug,
32    Clone,
33    Copy,
34    PartialEq,
35    Eq,
36    Hash,
37    PartialOrd,
38    Ord,
39    Serialize,
40    Deserialize,
41    Default,
42    ValueEnum,
43)]
44#[serde(rename_all = "lowercase")]
45#[value(rename_all = "lowercase")]
46pub enum ReasoningLevel {
47    /// Reasoning explicitly disabled. On providers that always reason
48    /// (Grok 4, deepseek-reasoner direct), maps to "lowest available".
49    None,
50    /// Minimum reasoning the provider exposes. OpenAI GPT-5 calls this
51    /// `minimal`; absent on most other providers (collapses to `None` or
52    /// `Low`).
53    Minimal,
54    /// Light reasoning for simple multi-step prompts.
55    Low,
56    /// The default. Useful chain-of-thought without excessive cost.
57    #[default]
58    Medium,
59    /// Heavy reasoning for hard prompts.
60    High,
61    /// Between `High` and `Max` — OpenAI GPT-5.2+ `xhigh`, Anthropic Opus 4.7
62    /// `xhigh` (gated per-model). A step up from `High` for providers that
63    /// expose a dedicated `xhigh` tier. Providers without it snap down via
64    /// `nearest_effort` to `High`.
65    XHigh,
66    /// Maximum reasoning the model exposes. Anthropic's adaptive `max`,
67    /// OpenRouter's `max`. On OpenAI Chat Completions Effort this collapses
68    /// to `high` (no `max` tier in that shape); on Gemini 3 collapses to
69    /// `high` (no `max` tier there either).
70    Max,
71}
72
73impl ReasoningLevel {
74    /// Ordering rank, used by `nearest_effort()`. `None < Minimal < Low <
75    /// Medium < High < Max`.
76    fn rank(self) -> u8 {
77        match self {
78            ReasoningLevel::None => 0,
79            ReasoningLevel::Minimal => 1,
80            ReasoningLevel::Low => 2,
81            ReasoningLevel::Medium => 3,
82            ReasoningLevel::High => 4,
83            // XHigh sits between High and Max (one provider-specific tier
84            // above High, below the provider's nominal "max" where one
85            // exists). OpenRouter-style "max" is strictly higher.
86            ReasoningLevel::XHigh => 5,
87            ReasoningLevel::Max => 6,
88        }
89    }
90
91    /// Lowercase string form (matches the serde representation). Provided
92    /// as a method so call sites that want a `&'static str` don't have to
93    /// round-trip through serde.
94    pub fn as_str(self) -> &'static str {
95        match self {
96            ReasoningLevel::None => "none",
97            ReasoningLevel::Minimal => "minimal",
98            ReasoningLevel::Low => "low",
99            ReasoningLevel::Medium => "medium",
100            ReasoningLevel::High => "high",
101            ReasoningLevel::Max => "max",
102            ReasoningLevel::XHigh => "xhigh",
103        }
104    }
105}
106
107/// Describes the reasoning controls a model exposes.
108///
109/// Adapters declare this via `ModelCapabilities::supports_reasoning`.
110/// The user-facing `ReasoningLevel` is mapped onto whatever shape the
111/// model actually accepts via `nearest_effort()`.
112#[derive(Debug, Clone, PartialEq, Eq)]
113pub enum ReasoningCapability {
114    /// Model has no reasoning controls. Any `ReasoningLevel` is silently
115    /// ignored by the adapter.
116    Unsupported,
117    /// Model has on/off reasoning only (most Ollama-hosted models like
118    /// deepseek-r1, qwen3, kimi-k2-thinking — `think: true/false`).
119    /// `ReasoningLevel::None` → off; anything else → on.
120    Binary,
121    /// Model exposes a discrete enum of supported levels (gpt-oss,
122    /// OpenAI o-series + GPT-5, Anthropic adaptive thinking, Groq,
123    /// Fireworks, vLLM-on-gpt-oss). `nearest_effort()` maps the
124    /// requested level onto this set.
125    Levels(Vec<ReasoningLevel>),
126    /// Model expects a numeric token budget (legacy Anthropic
127    /// `budget_tokens`, legacy Gemini 2.5 `thinking_budget`). Adapters
128    /// translate `ReasoningLevel` onto a value in `[min, max]`.
129    Budget { min: usize, max: usize },
130}
131
132/// A typed chunk of reasoning content emitted during streaming.
133///
134/// Replaces the in-band `"Thinking..."` / `"...done thinking."` text
135/// markers the legacy text callback used. `signature` is reserved for
136/// providers that emit verifiable thinking traces (Anthropic's
137/// `signature` field, OpenAI's `encrypted_content`); `None` for now.
138#[derive(Debug, Clone, PartialEq, Eq)]
139pub struct ReasoningChunk {
140    pub text: String,
141    pub signature: Option<String>,
142}
143
144/// Map a requested reasoning level onto the closest level the model
145/// actually supports.
146///
147/// Algorithm (lifted from OpenAI Codex's `nearest_effort`):
148///   1. If the model has no supported levels, return `None` — caller
149///      decides whether to error or silently disable reasoning.
150///   2. Prefer the highest supported level whose rank is ≤ requested.
151///      This is the "graceful downgrade" semantic Claude Code uses
152///      ("falls back to the highest supported level at or below the one
153///      you set").
154///   3. If every supported level is above the requested rank, fall back
155///      to the lowest supported level. Better to honor the user's intent
156///      to enable reasoning than to silently disable it.
157pub fn nearest_effort(
158    requested: ReasoningLevel,
159    supported: &[ReasoningLevel],
160) -> Option<ReasoningLevel> {
161    if supported.is_empty() {
162        return None;
163    }
164
165    let target = requested.rank();
166    let at_or_below = supported.iter().filter(|l| l.rank() <= target).max();
167    if let Some(level) = at_or_below {
168        return Some(*level);
169    }
170
171    // No level at or below the request — return the lowest supported.
172    supported.iter().min().copied()
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178
179    #[test]
180    fn reasoning_level_default_is_medium() {
181        assert_eq!(ReasoningLevel::default(), ReasoningLevel::Medium);
182    }
183
184    #[test]
185    fn reasoning_level_serde_roundtrip() {
186        for level in [
187            ReasoningLevel::None,
188            ReasoningLevel::Minimal,
189            ReasoningLevel::Low,
190            ReasoningLevel::Medium,
191            ReasoningLevel::High,
192            ReasoningLevel::Max,
193            ReasoningLevel::XHigh,
194        ] {
195            let json = serde_json::to_string(&level).unwrap();
196            let back: ReasoningLevel = serde_json::from_str(&json).unwrap();
197            assert_eq!(level, back);
198            assert_eq!(json.trim_matches('"'), level.as_str());
199        }
200    }
201
202    #[test]
203    fn reasoning_level_ord_matches_rank() {
204        assert!(ReasoningLevel::None < ReasoningLevel::Minimal);
205        assert!(ReasoningLevel::Minimal < ReasoningLevel::Low);
206        assert!(ReasoningLevel::Low < ReasoningLevel::Medium);
207        assert!(ReasoningLevel::Medium < ReasoningLevel::High);
208        // XHigh is strictly between High and Max.
209        assert!(ReasoningLevel::High < ReasoningLevel::XHigh);
210        assert!(ReasoningLevel::XHigh < ReasoningLevel::Max);
211    }
212
213    #[test]
214    fn nearest_effort_empty_returns_none() {
215        assert_eq!(nearest_effort(ReasoningLevel::Medium, &[]), None);
216    }
217
218    #[test]
219    fn nearest_effort_exact_match() {
220        let supported = vec![
221            ReasoningLevel::Low,
222            ReasoningLevel::Medium,
223            ReasoningLevel::High,
224        ];
225        assert_eq!(
226            nearest_effort(ReasoningLevel::Medium, &supported),
227            Some(ReasoningLevel::Medium),
228        );
229    }
230
231    #[test]
232    fn nearest_effort_downgrades_to_highest_at_or_below() {
233        // Requested High, supported only Low + Medium → Medium.
234        let supported = vec![ReasoningLevel::Low, ReasoningLevel::Medium];
235        assert_eq!(
236            nearest_effort(ReasoningLevel::High, &supported),
237            Some(ReasoningLevel::Medium),
238        );
239    }
240
241    #[test]
242    fn nearest_effort_upgrades_when_all_above_request() {
243        // Requested None, supported only Medium + High → Medium (lowest).
244        // Better to honor "enable reasoning" intent than silently disable.
245        let supported = vec![ReasoningLevel::Medium, ReasoningLevel::High];
246        assert_eq!(
247            nearest_effort(ReasoningLevel::None, &supported),
248            Some(ReasoningLevel::Medium),
249        );
250    }
251
252    #[test]
253    fn nearest_effort_max_request_with_lower_ceiling() {
254        // Requested Max, supported up through High → High.
255        let supported = vec![
256            ReasoningLevel::Low,
257            ReasoningLevel::Medium,
258            ReasoningLevel::High,
259        ];
260        assert_eq!(
261            nearest_effort(ReasoningLevel::Max, &supported),
262            Some(ReasoningLevel::High),
263        );
264    }
265
266    #[test]
267    fn reasoning_chunk_construction() {
268        let chunk = ReasoningChunk {
269            text: "thinking through the problem".to_string(),
270            signature: None,
271        };
272        assert_eq!(chunk.text, "thinking through the problem");
273        assert!(chunk.signature.is_none());
274    }
275
276    /// `ValueEnum` derive lets clap parse `--reasoning <level>` directly.
277    /// Verifies all 7 lowercase strings round-trip through the parser
278    /// — protects against accidental rename in `serde rename_all`
279    /// drifting from `value rename_all`.
280    #[test]
281    fn clap_value_enum_parses_all_seven_levels() {
282        use clap::ValueEnum as _;
283        for (s, expected) in [
284            ("none", ReasoningLevel::None),
285            ("minimal", ReasoningLevel::Minimal),
286            ("low", ReasoningLevel::Low),
287            ("medium", ReasoningLevel::Medium),
288            ("high", ReasoningLevel::High),
289            ("max", ReasoningLevel::Max),
290            ("xhigh", ReasoningLevel::XHigh),
291        ] {
292            let parsed = ReasoningLevel::from_str(s, true).expect(s);
293            assert_eq!(parsed, expected);
294        }
295    }
296
297    #[test]
298    fn clap_value_enum_rejects_unknown_levels() {
299        use clap::ValueEnum as _;
300        assert!(ReasoningLevel::from_str("foobar", true).is_err());
301        assert!(ReasoningLevel::from_str("medium ", true).is_err());
302    }
303}