mermaid_cli/models/
config.rs

1//! Unified configuration system for models and backends
2//!
3//! Replaces the fragmented app::Config + models::ModelConfig split
4//! with a single, coherent, backend-agnostic configuration structure.
5
6use crate::constants::{DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE};
7use crate::models::reasoning::ReasoningLevel;
8use crate::prompts;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12/// Unified model configuration
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct ModelConfig {
15    /// Model identifier (provider/model or just model name)
16    /// Examples: "ollama/qwen3-coder:30b", "qwen3-coder:30b", "gpt-4"
17    pub model: String,
18
19    /// Temperature (0.0-2.0, controls randomness)
20    #[serde(default = "default_temperature")]
21    pub temperature: f32,
22
23    /// Maximum tokens to generate
24    #[serde(default = "default_max_tokens")]
25    pub max_tokens: usize,
26
27    /// System prompt override (None = use default)
28    pub system_prompt: Option<String>,
29
30    /// Project-specific instructions appended to the system prompt
31    /// (Step 5h: MERMAID.md content). Runtime-only — never persisted.
32    /// On Anthropic, this gets its own `cache_control` block so the
33    /// static base stays cached even when the dynamic suffix changes.
34    /// On other adapters, it's concatenated onto the system prompt
35    /// with a `---` separator.
36    #[serde(skip)]
37    pub dynamic_system_suffix: Option<String>,
38
39    /// Requested reasoning depth. Adapters map this to provider-native
40    /// shapes via `nearest_effort()` against `ModelCapabilities
41    /// ::supports_reasoning`. Defaults to `Medium` — the OpenAI / Anthropic
42    /// / Gemini default and the level that produces useful chain-of-thought
43    /// without burning excessive latency for routine prompts.
44    #[serde(default)]
45    pub reasoning: ReasoningLevel,
46
47    /// Hide the reasoning trace from the user-visible stream while still
48    /// allowing the model to reason server-side. Maps to Ollama's
49    /// `--hidethinking` semantics and Anthropic's `thinking.display:
50    /// "omitted"`. Independent of `reasoning` (you can have full reasoning
51    /// depth with the trace hidden, or no reasoning at all — the two
52    /// concerns are orthogonal).
53    #[serde(default)]
54    pub hide_reasoning_trace: bool,
55
56    /// Whether this is a subagent context (excludes the agent tool to prevent nesting).
57    /// Runtime-only flag -- never persisted to disk.
58    #[serde(skip)]
59    pub is_subagent: bool,
60
61    /// Backend-specific options (provider name -> key/value pairs)
62    /// Example: {"ollama": {"num_gpu": "10", "num_ctx": "8192"}}
63    #[serde(default)]
64    pub backend_options: HashMap<String, HashMap<String, String>>,
65
66    /// MCP tool definitions in Ollama JSON format (runtime-only, never persisted).
67    /// Merged with built-in tools when sending requests to the model.
68    #[serde(skip)]
69    pub mcp_tools: Vec<serde_json::Value>,
70}
71
72impl Default for ModelConfig {
73    fn default() -> Self {
74        Self {
75            // Intentionally empty — every real construction goes through
76            // `from_app_config` which sets this immediately. Leaving a
77            // concrete model here (e.g. "ollama/tinyllama") would silently
78            // boot an unintended model if the default ever leaked to a
79            // call site; an empty string produces a clearer server error.
80            model: String::new(),
81            temperature: default_temperature(),
82            max_tokens: default_max_tokens(),
83            system_prompt: Some(prompts::get_system_prompt()),
84            dynamic_system_suffix: None,
85            reasoning: ReasoningLevel::default(),
86            hide_reasoning_trace: false,
87            is_subagent: false,
88            backend_options: HashMap::new(),
89            mcp_tools: Vec::new(),
90        }
91    }
92}
93
94impl ModelConfig {
95    /// Get a backend-specific option
96    pub fn get_backend_option(&self, backend: &str, key: &str) -> Option<&String> {
97        self.backend_options.get(backend)?.get(key)
98    }
99
100    /// Get backend option as integer
101    pub fn get_backend_option_i32(&self, backend: &str, key: &str) -> Option<i32> {
102        self.get_backend_option(backend, key)?.parse::<i32>().ok()
103    }
104
105    /// Get backend option as boolean
106    pub fn get_backend_option_bool(&self, backend: &str, key: &str) -> Option<bool> {
107        self.get_backend_option(backend, key)?.parse::<bool>().ok()
108    }
109
110    /// Set a backend-specific option
111    pub fn set_backend_option(&mut self, backend: String, key: String, value: String) {
112        self.backend_options
113            .entry(backend)
114            .or_default()
115            .insert(key, value);
116    }
117
118    /// Build the system-prompt string for adapters that don't support
119    /// per-block cache control (Gemini, OpenAI-compat, Ollama). Joins
120    /// the static base and the dynamic suffix (MERMAID.md content)
121    /// with a `---` separator. Anthropic's adapter doesn't use this
122    /// helper — it emits two separately-cached typed-text blocks.
123    ///
124    /// Returns `None` only when both fields are empty/unset.
125    pub fn combined_system_prompt(&self) -> Option<String> {
126        match (
127            self.system_prompt.as_deref(),
128            self.dynamic_system_suffix.as_deref(),
129        ) {
130            (Some(s), Some(suffix)) if !s.is_empty() && !suffix.is_empty() => {
131                Some(format!("{}\n\n---\n\n{}", s, suffix))
132            },
133            (Some(s), _) if !s.is_empty() => Some(s.to_string()),
134            (_, Some(suffix)) if !suffix.is_empty() => Some(suffix.to_string()),
135            _ => None,
136        }
137    }
138
139    /// Build a ModelConfig from user-facing app Config for a given model ID.
140    ///
141    /// Centralizes the wiring of temperature, max_tokens, reasoning level,
142    /// and Ollama hardware options that was previously scattered across
143    /// orchestrator.rs and model.rs.
144    ///
145    /// Reasoning resolution: per-model preference
146    /// (`config.reasoning_per_model[model_id]`) wins, then falls back to
147    /// the global `default_model.reasoning`. Set per-model via Alt+T or
148    /// `/reasoning <level>` while using the model in question.
149    pub fn from_app_config(config: &crate::app::Config, model_id: &str) -> Self {
150        let reasoning = config
151            .reasoning_per_model
152            .get(model_id)
153            .copied()
154            .unwrap_or(config.default_model.reasoning);
155        let mut mc = Self {
156            model: model_id.to_string(),
157            temperature: config.default_model.temperature,
158            max_tokens: config.default_model.max_tokens,
159            reasoning,
160            ..Self::default()
161        };
162        if let Some(v) = config.ollama.num_gpu {
163            mc.set_backend_option("ollama".into(), "num_gpu".into(), v.to_string());
164        }
165        if let Some(v) = config.ollama.num_ctx {
166            mc.set_backend_option("ollama".into(), "num_ctx".into(), v.to_string());
167        }
168        if let Some(v) = config.ollama.num_thread {
169            mc.set_backend_option("ollama".into(), "num_thread".into(), v.to_string());
170        }
171        if let Some(v) = config.ollama.numa {
172            mc.set_backend_option("ollama".into(), "numa".into(), v.to_string());
173        }
174        mc
175    }
176
177    /// Extract Ollama-specific options
178    pub fn ollama_options(&self) -> OllamaOptions {
179        OllamaOptions {
180            num_gpu: self.get_backend_option_i32("ollama", "num_gpu"),
181            num_thread: self.get_backend_option_i32("ollama", "num_thread"),
182            num_ctx: self.get_backend_option_i32("ollama", "num_ctx"),
183            numa: self.get_backend_option_bool("ollama", "numa"),
184        }
185    }
186}
187
188/// Ollama-specific options (extracted from backend_options)
189#[derive(Debug, Clone, Default)]
190pub struct OllamaOptions {
191    pub num_gpu: Option<i32>,
192    pub num_thread: Option<i32>,
193    pub num_ctx: Option<i32>,
194    pub numa: Option<bool>,
195}
196
197/// Backend connection configuration
198#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct BackendConfig {
200    /// Ollama server URL (default: http://localhost:11434)
201    #[serde(default = "default_ollama_url")]
202    pub ollama_url: String,
203
204    /// Connection timeout in seconds
205    #[serde(default = "default_timeout")]
206    pub timeout_secs: u64,
207
208    /// Max idle connections per host
209    #[serde(default = "default_max_idle")]
210    pub max_idle_per_host: usize,
211}
212
213impl Default for BackendConfig {
214    fn default() -> Self {
215        Self {
216            ollama_url: default_ollama_url(),
217            timeout_secs: default_timeout(),
218            max_idle_per_host: default_max_idle(),
219        }
220    }
221}
222
223// Default value functions
224fn default_temperature() -> f32 {
225    DEFAULT_TEMPERATURE
226}
227
228fn default_max_tokens() -> usize {
229    DEFAULT_MAX_TOKENS
230}
231
232fn default_ollama_url() -> String {
233    // Real callers always go through `ModelFactory::config_to_backend_config`,
234    // which reads `app::Config.ollama.host/port` (the single documented config
235    // path). This default only fires when constructing `BackendConfig::default`
236    // directly (no app config supplied) — primarily tests. Keep it static so
237    // the precedence is unambiguous; a `MERMAID_OLLAMA_HOST` env override
238    // would belong on `app::Config` loading instead, where it can be
239    // documented and surfaced in `mermaid status`.
240    "http://localhost:11434".to_string()
241}
242
243fn default_timeout() -> u64 {
244    10
245}
246
247fn default_max_idle() -> usize {
248    10
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254
255    /// Step 4 wires `default_model.reasoning` from app config into the
256    /// per-call ModelConfig. Without this, the user's config-file choice
257    /// would silently revert to `Medium` on every session bootstrap.
258    #[test]
259    fn from_app_config_propagates_reasoning_from_settings() {
260        let mut cfg = crate::app::Config::default();
261        cfg.default_model.reasoning = ReasoningLevel::High;
262
263        let mc = ModelConfig::from_app_config(&cfg, "ollama/qwen3-coder:30b");
264        assert_eq!(mc.reasoning, ReasoningLevel::High);
265        assert_eq!(mc.model, "ollama/qwen3-coder:30b");
266    }
267
268    #[test]
269    fn from_app_config_uses_medium_default_when_unset() {
270        let cfg = crate::app::Config::default();
271        let mc = ModelConfig::from_app_config(&cfg, "ollama/qwen3-coder:30b");
272        assert_eq!(mc.reasoning, ReasoningLevel::Medium);
273    }
274
275    /// Per-model preference wins over the global default. This is the
276    /// Step 5b semantic: setting `/reasoning high` on Sonnet sticks for
277    /// Sonnet without affecting other models.
278    #[test]
279    fn from_app_config_uses_per_model_preference() {
280        let mut cfg = crate::app::Config::default();
281        cfg.default_model.reasoning = ReasoningLevel::Low;
282        cfg.reasoning_per_model.insert(
283            "anthropic/claude-sonnet-4-6".to_string(),
284            ReasoningLevel::High,
285        );
286
287        let mc_per_model = ModelConfig::from_app_config(&cfg, "anthropic/claude-sonnet-4-6");
288        assert_eq!(mc_per_model.reasoning, ReasoningLevel::High);
289
290        // Falls back to default for other models.
291        let mc_default = ModelConfig::from_app_config(&cfg, "ollama/foo");
292        assert_eq!(mc_default.reasoning, ReasoningLevel::Low);
293    }
294}
mermaid_cli/models/config.rs

mermaid_cli/models/
config.rs