mermaid_cli/models/
config.rs

1//! Unified configuration system for models and backends
2//!
3//! Replaces the fragmented app::Config + models::ModelConfig split
4//! with a single, coherent, backend-agnostic configuration structure.
5
6use crate::constants::{DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE};
7use crate::models::reasoning::ReasoningLevel;
8use crate::prompts;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12/// Unified model configuration
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct ModelConfig {
15    /// Model identifier (provider/model or just model name)
16    /// Examples: "ollama/qwen3-coder:30b", "qwen3-coder:30b", "gpt-4"
17    pub model: String,
18
19    /// Temperature (0.0-2.0, controls randomness)
20    #[serde(default = "default_temperature")]
21    pub temperature: f32,
22
23    /// Maximum tokens to generate
24    #[serde(default = "default_max_tokens")]
25    pub max_tokens: usize,
26
27    /// System prompt override (None = use default)
28    pub system_prompt: Option<String>,
29
30    /// Project-specific instructions appended to the system prompt
31    /// (Step 5h: MERMAID.md content). Runtime-only — never persisted.
32    /// On Anthropic, this gets its own `cache_control` block so the
33    /// static base stays cached even when the dynamic suffix changes.
34    /// On other adapters, it's concatenated onto the system prompt
35    /// with a `---` separator.
36    #[serde(skip)]
37    pub dynamic_system_suffix: Option<String>,
38
39    /// Requested reasoning depth. Adapters map this to provider-native
40    /// shapes via `nearest_effort()` against `ModelCapabilities
41    /// ::supports_reasoning`. Defaults to `Medium` — the OpenAI / Anthropic
42    /// / Gemini default and the level that produces useful chain-of-thought
43    /// without burning excessive latency for routine prompts.
44    #[serde(default)]
45    pub reasoning: ReasoningLevel,
46
47    /// Hide reasoning traces from the user-facing stream while still
48    /// allowing the model to reason server-side. Maps to Ollama's
49    /// `--hidethinking` semantics and Anthropic's `thinking.display:
50    /// "hidden"`. Internal plumbing; the reducer currently never
51    /// sets this (no UI toggle) but the adapter pipeline honors it
52    /// when a future toggle lands.
53    #[serde(default)]
54    pub hide_reasoning_trace: bool,
55
56    /// Backend-specific options (provider name -> key/value pairs)
57    /// Example: {"ollama": {"num_gpu": "10", "num_ctx": "8192"}}
58    #[serde(default)]
59    pub backend_options: HashMap<String, HashMap<String, String>>,
60
61    /// Tool definitions the model sees, already translated into
62    /// OpenAI-compatible `{type: "function", function: {name,
63    /// description, parameters}}` shape. Runtime-only. Populated by
64    /// provider wrappers from `ChatRequest.tools` — adapters iterate
65    /// this directly, no internal registry.
66    #[serde(skip)]
67    pub tools: Vec<serde_json::Value>,
68}
69
70impl Default for ModelConfig {
71    fn default() -> Self {
72        Self {
73            // Intentionally empty — every real construction goes through
74            // a provider wrapper that sets `model` immediately.
75            model: String::new(),
76            temperature: default_temperature(),
77            max_tokens: default_max_tokens(),
78            system_prompt: Some(prompts::get_system_prompt()),
79            dynamic_system_suffix: None,
80            reasoning: ReasoningLevel::default(),
81            hide_reasoning_trace: false,
82            backend_options: HashMap::new(),
83            tools: Vec::new(),
84        }
85    }
86}
87
88impl ModelConfig {
89    /// Get a backend-specific option
90    pub fn get_backend_option(&self, backend: &str, key: &str) -> Option<&String> {
91        self.backend_options.get(backend)?.get(key)
92    }
93
94    /// Get backend option as integer
95    pub fn get_backend_option_i32(&self, backend: &str, key: &str) -> Option<i32> {
96        self.get_backend_option(backend, key)?.parse::<i32>().ok()
97    }
98
99    /// Get backend option as boolean
100    pub fn get_backend_option_bool(&self, backend: &str, key: &str) -> Option<bool> {
101        self.get_backend_option(backend, key)?.parse::<bool>().ok()
102    }
103
104    /// Set a backend-specific option
105    pub fn set_backend_option(&mut self, backend: String, key: String, value: String) {
106        self.backend_options
107            .entry(backend)
108            .or_default()
109            .insert(key, value);
110    }
111
112    /// Build the system-prompt string for adapters that don't support
113    /// per-block cache control (Gemini, OpenAI-compat, Ollama). Joins
114    /// the static base and the dynamic suffix (MERMAID.md content)
115    /// with a `---` separator. Anthropic's adapter doesn't use this
116    /// helper — it emits two separately-cached typed-text blocks.
117    ///
118    /// Returns `None` only when both fields are empty/unset.
119    pub fn combined_system_prompt(&self) -> Option<String> {
120        match (
121            self.system_prompt.as_deref(),
122            self.dynamic_system_suffix.as_deref(),
123        ) {
124            (Some(s), Some(suffix)) if !s.is_empty() && !suffix.is_empty() => {
125                Some(format!("{}\n\n---\n\n{}", s, suffix))
126            },
127            (Some(s), _) if !s.is_empty() => Some(s.to_string()),
128            (_, Some(suffix)) if !suffix.is_empty() => Some(suffix.to_string()),
129            _ => None,
130        }
131    }
132
133    /// Build a ModelConfig from user-facing app Config for a given model ID.
134    ///
135    /// Centralizes the wiring of temperature, max_tokens, reasoning level,
136    /// and Ollama hardware options that was previously scattered across
137    /// orchestrator.rs and model.rs.
138    ///
139    /// Reasoning resolution: per-model preference
140    /// (`config.reasoning_per_model[model_id]`) wins, then falls back to
141    /// the global `default_model.reasoning`. Set per-model via Alt+T or
142    /// `/reasoning <level>` while using the model in question.
143    pub fn from_app_config(config: &crate::app::Config, model_id: &str) -> Self {
144        let reasoning = config
145            .reasoning_per_model
146            .get(model_id)
147            .copied()
148            .unwrap_or(config.default_model.reasoning);
149        let mut mc = Self {
150            model: model_id.to_string(),
151            temperature: config.default_model.temperature,
152            max_tokens: config.default_model.max_tokens,
153            reasoning,
154            ..Self::default()
155        };
156        if let Some(v) = config.ollama.num_gpu {
157            mc.set_backend_option("ollama".into(), "num_gpu".into(), v.to_string());
158        }
159        if let Some(v) = config.ollama.num_ctx {
160            mc.set_backend_option("ollama".into(), "num_ctx".into(), v.to_string());
161        }
162        if let Some(v) = config.ollama.num_thread {
163            mc.set_backend_option("ollama".into(), "num_thread".into(), v.to_string());
164        }
165        if let Some(v) = config.ollama.numa {
166            mc.set_backend_option("ollama".into(), "numa".into(), v.to_string());
167        }
168        mc
169    }
170
171    /// Extract Ollama-specific options
172    pub fn ollama_options(&self) -> OllamaOptions {
173        OllamaOptions {
174            num_gpu: self.get_backend_option_i32("ollama", "num_gpu"),
175            num_thread: self.get_backend_option_i32("ollama", "num_thread"),
176            num_ctx: self.get_backend_option_i32("ollama", "num_ctx"),
177            numa: self.get_backend_option_bool("ollama", "numa"),
178        }
179    }
180}
181
182/// Ollama-specific options (extracted from backend_options)
183#[derive(Debug, Clone, Default)]
184pub struct OllamaOptions {
185    pub num_gpu: Option<i32>,
186    pub num_thread: Option<i32>,
187    pub num_ctx: Option<i32>,
188    pub numa: Option<bool>,
189}
190
191/// Backend connection configuration
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct BackendConfig {
194    /// Ollama server URL (default: http://localhost:11434)
195    #[serde(default = "default_ollama_url")]
196    pub ollama_url: String,
197
198    /// Connection timeout in seconds
199    #[serde(default = "default_timeout")]
200    pub timeout_secs: u64,
201
202    /// Max idle connections per host
203    #[serde(default = "default_max_idle")]
204    pub max_idle_per_host: usize,
205}
206
207impl Default for BackendConfig {
208    fn default() -> Self {
209        Self {
210            ollama_url: default_ollama_url(),
211            timeout_secs: default_timeout(),
212            max_idle_per_host: default_max_idle(),
213        }
214    }
215}
216
217// Default value functions
218fn default_temperature() -> f32 {
219    DEFAULT_TEMPERATURE
220}
221
222fn default_max_tokens() -> usize {
223    DEFAULT_MAX_TOKENS
224}
225
226fn default_ollama_url() -> String {
227    // Real callers always go through `the `providers::factory::ProviderFactory` path`,
228    // which reads `app::Config.ollama.host/port` (the single documented config
229    // path). This default only fires when constructing `BackendConfig::default`
230    // directly (no app config supplied) — primarily tests. Keep it static so
231    // the precedence is unambiguous; a `MERMAID_OLLAMA_HOST` env override
232    // would belong on `app::Config` loading instead, where it can be
233    // documented and surfaced in `mermaid status`.
234    "http://localhost:11434".to_string()
235}
236
237fn default_timeout() -> u64 {
238    10
239}
240
241fn default_max_idle() -> usize {
242    10
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    /// Step 4 wires `default_model.reasoning` from app config into the
250    /// per-call ModelConfig. Without this, the user's config-file choice
251    /// would silently revert to `Medium` on every session bootstrap.
252    #[test]
253    fn from_app_config_propagates_reasoning_from_settings() {
254        let mut cfg = crate::app::Config::default();
255        cfg.default_model.reasoning = ReasoningLevel::High;
256
257        let mc = ModelConfig::from_app_config(&cfg, "ollama/qwen3-coder:30b");
258        assert_eq!(mc.reasoning, ReasoningLevel::High);
259        assert_eq!(mc.model, "ollama/qwen3-coder:30b");
260    }
261
262    #[test]
263    fn from_app_config_uses_medium_default_when_unset() {
264        let cfg = crate::app::Config::default();
265        let mc = ModelConfig::from_app_config(&cfg, "ollama/qwen3-coder:30b");
266        assert_eq!(mc.reasoning, ReasoningLevel::Medium);
267    }
268
269    /// Per-model preference wins over the global default. This is the
270    /// Step 5b semantic: setting `/reasoning high` on Sonnet sticks for
271    /// Sonnet without affecting other models.
272    #[test]
273    fn from_app_config_uses_per_model_preference() {
274        let mut cfg = crate::app::Config::default();
275        cfg.default_model.reasoning = ReasoningLevel::Low;
276        cfg.reasoning_per_model.insert(
277            "anthropic/claude-sonnet-4-6".to_string(),
278            ReasoningLevel::High,
279        );
280
281        let mc_per_model = ModelConfig::from_app_config(&cfg, "anthropic/claude-sonnet-4-6");
282        assert_eq!(mc_per_model.reasoning, ReasoningLevel::High);
283
284        // Falls back to default for other models.
285        let mc_default = ModelConfig::from_app_config(&cfg, "ollama/foo");
286        assert_eq!(mc_default.reasoning, ReasoningLevel::Low);
287    }
288}
mermaid_cli/models/config.rs

mermaid_cli/models/
config.rs