mermaid_cli/models/reasoning.rs
1//! Reasoning depth abstraction for multi-provider model adapters.
2//!
3//! All major LLM providers have converged on string-enum reasoning effort
4//! controls (OpenAI `reasoning_effort`, Anthropic adaptive `effort`,
5//! Gemini 3 `thinking_level`, Groq, Fireworks, vLLM-on-gpt-oss, OpenRouter
6//! normalized). Numeric token-budget knobs (legacy Anthropic `budget_tokens`,
7//! legacy Gemini `thinking_budget`) are being phased out.
8//!
9//! Mermaid exposes a single `ReasoningLevel` enum at the user surface; each
10//! adapter maps it to provider-native shapes via dedicated functions. The
11//! enum shape mirrors OpenAI Codex's `ReasoningEffort` (the closest Rust
12//! prior art), with `XHigh` renamed to `Max` for cleaner Mermaid UX. Six
13//! variants beats three because Anthropic and OpenAI's frontier models
14//! both ship the extreme tier and the `None` / `Minimal` distinction
15//! matters for cost-conscious workloads.
16
17use clap::ValueEnum;
18use serde::{Deserialize, Serialize};
19
20/// Reasoning depth requested for a model call.
21///
22/// `Default` is `Medium` — matches OpenAI's `reasoning_effort` default and
23/// is the level that produces useful chain-of-thought without burning
24/// excessive latency / tokens for routine prompts.
25///
26/// String form is lowercase (`"none"`, `"minimal"`, `"low"`, `"medium"`,
27/// `"high"`, `"max"`) so config files and CLI flags read naturally.
28/// `ValueEnum` is derived so clap can parse `--reasoning <level>`
29/// directly from the user-facing strings — no custom parser layer.
30#[derive(
31 Debug,
32 Clone,
33 Copy,
34 PartialEq,
35 Eq,
36 Hash,
37 PartialOrd,
38 Ord,
39 Serialize,
40 Deserialize,
41 Default,
42 ValueEnum,
43)]
44#[serde(rename_all = "lowercase")]
45#[value(rename_all = "lowercase")]
46pub enum ReasoningLevel {
47 /// Reasoning explicitly disabled. On providers that always reason
48 /// (Grok 4, deepseek-reasoner direct), maps to "lowest available".
49 None,
50 /// Minimum reasoning the provider exposes. OpenAI GPT-5 calls this
51 /// `minimal`; absent on most other providers (collapses to `None` or
52 /// `Low`).
53 Minimal,
54 /// Light reasoning for simple multi-step prompts.
55 Low,
56 /// The default. Useful chain-of-thought without excessive cost.
57 #[default]
58 Medium,
59 /// Heavy reasoning for hard prompts.
60 High,
61 /// Between `High` and `Max` — OpenAI GPT-5.2+ `xhigh`, Anthropic Opus 4.7
62 /// `xhigh` (gated per-model). A step up from `High` for providers that
63 /// expose a dedicated `xhigh` tier. Providers without it snap down via
64 /// `nearest_effort` to `High`.
65 XHigh,
66 /// Maximum reasoning the model exposes. Anthropic's adaptive `max`,
67 /// OpenRouter's `max`. On OpenAI Chat Completions Effort this collapses
68 /// to `high` (no `max` tier in that shape); on Gemini 3 collapses to
69 /// `high` (no `max` tier there either).
70 Max,
71}
72
73impl ReasoningLevel {
74 /// Ordering rank, used by `nearest_effort()`. `None < Minimal < Low <
75 /// Medium < High < Max`.
76 fn rank(self) -> u8 {
77 match self {
78 ReasoningLevel::None => 0,
79 ReasoningLevel::Minimal => 1,
80 ReasoningLevel::Low => 2,
81 ReasoningLevel::Medium => 3,
82 ReasoningLevel::High => 4,
83 // XHigh sits between High and Max (one provider-specific tier
84 // above High, below the provider's nominal "max" where one
85 // exists). OpenRouter-style "max" is strictly higher.
86 ReasoningLevel::XHigh => 5,
87 ReasoningLevel::Max => 6,
88 }
89 }
90
91 /// Lowercase string form (matches the serde representation). Provided
92 /// as a method so call sites that want a `&'static str` don't have to
93 /// round-trip through serde.
94 pub fn as_str(self) -> &'static str {
95 match self {
96 ReasoningLevel::None => "none",
97 ReasoningLevel::Minimal => "minimal",
98 ReasoningLevel::Low => "low",
99 ReasoningLevel::Medium => "medium",
100 ReasoningLevel::High => "high",
101 ReasoningLevel::Max => "max",
102 ReasoningLevel::XHigh => "xhigh",
103 }
104 }
105}
106
107/// Describes the reasoning controls a model exposes.
108///
109/// Adapters declare this via `ModelCapabilities::supports_reasoning`.
110/// The user-facing `ReasoningLevel` is mapped onto whatever shape the
111/// model actually accepts via `nearest_effort()`.
112#[derive(Debug, Clone, PartialEq, Eq)]
113pub enum ReasoningCapability {
114 /// Model has no reasoning controls. Any `ReasoningLevel` is silently
115 /// ignored by the adapter.
116 Unsupported,
117 /// Model has on/off reasoning only (most Ollama-hosted models like
118 /// deepseek-r1, qwen3, kimi-k2-thinking — `think: true/false`).
119 /// `ReasoningLevel::None` → off; anything else → on.
120 Binary,
121 /// Model exposes a discrete enum of supported levels (gpt-oss,
122 /// OpenAI o-series + GPT-5, Anthropic adaptive thinking, Groq,
123 /// Fireworks, vLLM-on-gpt-oss). `nearest_effort()` maps the
124 /// requested level onto this set.
125 Levels(Vec<ReasoningLevel>),
126 /// Model expects a numeric token budget (legacy Anthropic
127 /// `budget_tokens`, legacy Gemini 2.5 `thinking_budget`). Adapters
128 /// translate `ReasoningLevel` onto a value in `[min, max]`.
129 Budget { min: usize, max: usize },
130}
131
132/// A typed chunk of reasoning content emitted during streaming.
133///
134/// Replaces the in-band `"Thinking..."` / `"...done thinking."` text
135/// markers the legacy text callback used. `signature` is reserved for
136/// providers that emit verifiable thinking traces (Anthropic's
137/// `signature` field, OpenAI's `encrypted_content`); `None` for now.
138#[derive(Debug, Clone, PartialEq, Eq)]
139pub struct ReasoningChunk {
140 pub text: String,
141 pub signature: Option<String>,
142}
143
144/// Map a requested reasoning level onto the closest level the model
145/// actually supports.
146///
147/// Algorithm (lifted from OpenAI Codex's `nearest_effort`):
148/// 1. If the model has no supported levels, return `None` — caller
149/// decides whether to error or silently disable reasoning.
150/// 2. Prefer the highest supported level whose rank is ≤ requested.
151/// This is the "graceful downgrade" semantic Claude Code uses
152/// ("falls back to the highest supported level at or below the one
153/// you set").
154/// 3. If every supported level is above the requested rank, fall back
155/// to the lowest supported level. Better to honor the user's intent
156/// to enable reasoning than to silently disable it.
157pub fn nearest_effort(
158 requested: ReasoningLevel,
159 supported: &[ReasoningLevel],
160) -> Option<ReasoningLevel> {
161 if supported.is_empty() {
162 return None;
163 }
164
165 let target = requested.rank();
166 let at_or_below = supported.iter().filter(|l| l.rank() <= target).max();
167 if let Some(level) = at_or_below {
168 return Some(*level);
169 }
170
171 // No level at or below the request — return the lowest supported.
172 supported.iter().min().copied()
173}
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178
179 #[test]
180 fn reasoning_level_default_is_medium() {
181 assert_eq!(ReasoningLevel::default(), ReasoningLevel::Medium);
182 }
183
184 #[test]
185 fn reasoning_level_serde_roundtrip() {
186 for level in [
187 ReasoningLevel::None,
188 ReasoningLevel::Minimal,
189 ReasoningLevel::Low,
190 ReasoningLevel::Medium,
191 ReasoningLevel::High,
192 ReasoningLevel::Max,
193 ReasoningLevel::XHigh,
194 ] {
195 let json = serde_json::to_string(&level).unwrap();
196 let back: ReasoningLevel = serde_json::from_str(&json).unwrap();
197 assert_eq!(level, back);
198 assert_eq!(json.trim_matches('"'), level.as_str());
199 }
200 }
201
202 #[test]
203 fn reasoning_level_ord_matches_rank() {
204 assert!(ReasoningLevel::None < ReasoningLevel::Minimal);
205 assert!(ReasoningLevel::Minimal < ReasoningLevel::Low);
206 assert!(ReasoningLevel::Low < ReasoningLevel::Medium);
207 assert!(ReasoningLevel::Medium < ReasoningLevel::High);
208 // XHigh is strictly between High and Max.
209 assert!(ReasoningLevel::High < ReasoningLevel::XHigh);
210 assert!(ReasoningLevel::XHigh < ReasoningLevel::Max);
211 }
212
213 #[test]
214 fn nearest_effort_empty_returns_none() {
215 assert_eq!(nearest_effort(ReasoningLevel::Medium, &[]), None);
216 }
217
218 #[test]
219 fn nearest_effort_exact_match() {
220 let supported = vec![
221 ReasoningLevel::Low,
222 ReasoningLevel::Medium,
223 ReasoningLevel::High,
224 ];
225 assert_eq!(
226 nearest_effort(ReasoningLevel::Medium, &supported),
227 Some(ReasoningLevel::Medium),
228 );
229 }
230
231 #[test]
232 fn nearest_effort_downgrades_to_highest_at_or_below() {
233 // Requested High, supported only Low + Medium → Medium.
234 let supported = vec![ReasoningLevel::Low, ReasoningLevel::Medium];
235 assert_eq!(
236 nearest_effort(ReasoningLevel::High, &supported),
237 Some(ReasoningLevel::Medium),
238 );
239 }
240
241 #[test]
242 fn nearest_effort_upgrades_when_all_above_request() {
243 // Requested None, supported only Medium + High → Medium (lowest).
244 // Better to honor "enable reasoning" intent than silently disable.
245 let supported = vec![ReasoningLevel::Medium, ReasoningLevel::High];
246 assert_eq!(
247 nearest_effort(ReasoningLevel::None, &supported),
248 Some(ReasoningLevel::Medium),
249 );
250 }
251
252 #[test]
253 fn nearest_effort_max_request_with_lower_ceiling() {
254 // Requested Max, supported up through High → High.
255 let supported = vec![
256 ReasoningLevel::Low,
257 ReasoningLevel::Medium,
258 ReasoningLevel::High,
259 ];
260 assert_eq!(
261 nearest_effort(ReasoningLevel::Max, &supported),
262 Some(ReasoningLevel::High),
263 );
264 }
265
266 #[test]
267 fn reasoning_chunk_construction() {
268 let chunk = ReasoningChunk {
269 text: "thinking through the problem".to_string(),
270 signature: None,
271 };
272 assert_eq!(chunk.text, "thinking through the problem");
273 assert!(chunk.signature.is_none());
274 }
275
276 /// `ValueEnum` derive lets clap parse `--reasoning <level>` directly.
277 /// Verifies all 7 lowercase strings round-trip through the parser
278 /// — protects against accidental rename in `serde rename_all`
279 /// drifting from `value rename_all`.
280 #[test]
281 fn clap_value_enum_parses_all_seven_levels() {
282 use clap::ValueEnum as _;
283 for (s, expected) in [
284 ("none", ReasoningLevel::None),
285 ("minimal", ReasoningLevel::Minimal),
286 ("low", ReasoningLevel::Low),
287 ("medium", ReasoningLevel::Medium),
288 ("high", ReasoningLevel::High),
289 ("max", ReasoningLevel::Max),
290 ("xhigh", ReasoningLevel::XHigh),
291 ] {
292 let parsed = ReasoningLevel::from_str(s, true).expect(s);
293 assert_eq!(parsed, expected);
294 }
295 }
296
297 #[test]
298 fn clap_value_enum_rejects_unknown_levels() {
299 use clap::ValueEnum as _;
300 assert!(ReasoningLevel::from_str("foobar", true).is_err());
301 assert!(ReasoningLevel::from_str("medium ", true).is_err());
302 }
303}