Skip to main content

mermaid_cli/models/
config.rs

1//! Unified configuration system for models and backends
2//!
3//! Replaces the fragmented app::Config + models::ModelConfig split
4//! with a single, coherent, backend-agnostic configuration structure.
5
6use crate::constants::{DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE};
7use crate::prompts;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11/// Unified model configuration
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ModelConfig {
14    /// Model identifier (provider/model or just model name)
15    /// Examples: "ollama/qwen3-coder:30b", "qwen3-coder:30b", "gpt-4"
16    pub model: String,
17
18    /// Temperature (0.0-2.0, controls randomness)
19    #[serde(default = "default_temperature")]
20    pub temperature: f32,
21
22    /// Maximum tokens to generate
23    #[serde(default = "default_max_tokens")]
24    pub max_tokens: usize,
25
26    /// System prompt override (None = use default)
27    pub system_prompt: Option<String>,
28
29    /// Enable thinking mode for models that support it (e.g., kimi, qwen3)
30    /// Some(true) = explicitly enabled, Some(false) = explicitly disabled, None = model default
31    #[serde(default = "default_thinking_enabled")]
32    pub thinking_enabled: Option<bool>,
33
34    /// Whether this is a subagent context (excludes the agent tool to prevent nesting).
35    /// Runtime-only flag -- never persisted to disk.
36    #[serde(skip)]
37    pub is_subagent: bool,
38
39    /// Backend-specific options (provider name -> key/value pairs)
40    /// Example: {"ollama": {"num_gpu": "10", "num_ctx": "8192"}}
41    #[serde(default)]
42    pub backend_options: HashMap<String, HashMap<String, String>>,
43
44    /// MCP tool definitions in Ollama JSON format (runtime-only, never persisted).
45    /// Merged with built-in tools when sending requests to the model.
46    #[serde(skip)]
47    pub mcp_tools: Vec<serde_json::Value>,
48}
49
50impl Default for ModelConfig {
51    fn default() -> Self {
52        Self {
53            model: "ollama/tinyllama".to_string(),
54            temperature: default_temperature(),
55            max_tokens: default_max_tokens(),
56            system_prompt: Some(prompts::get_system_prompt()),
57            thinking_enabled: Some(true),
58            is_subagent: false,
59            backend_options: HashMap::new(),
60            mcp_tools: Vec::new(),
61        }
62    }
63}
64
65impl ModelConfig {
66    /// Get a backend-specific option
67    pub fn get_backend_option(&self, backend: &str, key: &str) -> Option<&String> {
68        self.backend_options.get(backend)?.get(key)
69    }
70
71    /// Get backend option as integer
72    pub fn get_backend_option_i32(&self, backend: &str, key: &str) -> Option<i32> {
73        self.get_backend_option(backend, key)?.parse::<i32>().ok()
74    }
75
76    /// Get backend option as boolean
77    pub fn get_backend_option_bool(&self, backend: &str, key: &str) -> Option<bool> {
78        self.get_backend_option(backend, key)?.parse::<bool>().ok()
79    }
80
81    /// Set a backend-specific option
82    pub fn set_backend_option(&mut self, backend: String, key: String, value: String) {
83        self.backend_options
84            .entry(backend)
85            .or_default()
86            .insert(key, value);
87    }
88
89    /// Build a ModelConfig from user-facing app Config for a given model ID.
90    ///
91    /// Centralizes the wiring of temperature, max_tokens, and Ollama hardware
92    /// options that was previously scattered across orchestrator.rs and model.rs.
93    pub fn from_app_config(config: &crate::app::Config, model_id: &str) -> Self {
94        let mut mc = Self {
95            model: model_id.to_string(),
96            temperature: config.default_model.temperature,
97            max_tokens: config.default_model.max_tokens,
98            ..Self::default()
99        };
100        if let Some(v) = config.ollama.num_gpu {
101            mc.set_backend_option("ollama".into(), "num_gpu".into(), v.to_string());
102        }
103        if let Some(v) = config.ollama.num_ctx {
104            mc.set_backend_option("ollama".into(), "num_ctx".into(), v.to_string());
105        }
106        if let Some(v) = config.ollama.num_thread {
107            mc.set_backend_option("ollama".into(), "num_thread".into(), v.to_string());
108        }
109        if let Some(v) = config.ollama.numa {
110            mc.set_backend_option("ollama".into(), "numa".into(), v.to_string());
111        }
112        mc
113    }
114
115    /// Extract Ollama-specific options
116    pub fn ollama_options(&self) -> OllamaOptions {
117        OllamaOptions {
118            num_gpu: self.get_backend_option_i32("ollama", "num_gpu"),
119            num_thread: self.get_backend_option_i32("ollama", "num_thread"),
120            num_ctx: self.get_backend_option_i32("ollama", "num_ctx"),
121            numa: self.get_backend_option_bool("ollama", "numa"),
122        }
123    }
124}
125
126/// Ollama-specific options (extracted from backend_options)
127#[derive(Debug, Clone, Default)]
128pub struct OllamaOptions {
129    pub num_gpu: Option<i32>,
130    pub num_thread: Option<i32>,
131    pub num_ctx: Option<i32>,
132    pub numa: Option<bool>,
133}
134
135/// Backend connection configuration
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct BackendConfig {
138    /// Ollama server URL (default: http://localhost:11434)
139    #[serde(default = "default_ollama_url")]
140    pub ollama_url: String,
141
142    /// Connection timeout in seconds
143    #[serde(default = "default_timeout")]
144    pub timeout_secs: u64,
145
146    /// Max idle connections per host
147    #[serde(default = "default_max_idle")]
148    pub max_idle_per_host: usize,
149}
150
151impl Default for BackendConfig {
152    fn default() -> Self {
153        Self {
154            ollama_url: default_ollama_url(),
155            timeout_secs: default_timeout(),
156            max_idle_per_host: default_max_idle(),
157        }
158    }
159}
160
161// Default value functions
162fn default_temperature() -> f32 {
163    DEFAULT_TEMPERATURE
164}
165
166fn default_max_tokens() -> usize {
167    DEFAULT_MAX_TOKENS
168}
169
170fn default_ollama_url() -> String {
171    std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".to_string())
172}
173
174fn default_timeout() -> u64 {
175    10
176}
177
178fn default_max_idle() -> usize {
179    10
180}
181
182fn default_thinking_enabled() -> Option<bool> {
183    Some(true)
184}