Skip to main content

mermaid_cli/models/
config.rs

1/// Unified configuration system for models and backends
2///
3/// Replaces the fragmented app::Config + models::ModelConfig split
4/// with a single, coherent, backend-agnostic configuration structure.
5
6use crate::prompts;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Unified model configuration
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct ModelConfig {
13    /// Model identifier (provider/model or just model name)
14    /// Examples: "ollama/qwen3-coder:30b", "qwen3-coder:30b", "gpt-4"
15    pub model: String,
16
17    /// Temperature (0.0-2.0, controls randomness)
18    #[serde(default = "default_temperature")]
19    pub temperature: f32,
20
21    /// Maximum tokens to generate
22    #[serde(default = "default_max_tokens")]
23    pub max_tokens: usize,
24
25    /// Top-p sampling (0.0-1.0)
26    pub top_p: Option<f32>,
27
28    /// Frequency penalty (-2.0 to 2.0)
29    pub frequency_penalty: Option<f32>,
30
31    /// Presence penalty (-2.0 to 2.0)
32    pub presence_penalty: Option<f32>,
33
34    /// System prompt override (None = use default)
35    pub system_prompt: Option<String>,
36
37    /// Enable thinking mode for models that support it (e.g., kimi, qwen3)
38    /// Default: true (enabled)
39    #[serde(default = "default_thinking_enabled")]
40    pub thinking_enabled: bool,
41
42    /// Backend-specific options (provider name -> key/value pairs)
43    /// Example: {"ollama": {"num_gpu": "10", "num_ctx": "8192"}}
44    #[serde(default)]
45    pub backend_options: HashMap<String, HashMap<String, String>>,
46}
47
48impl Default for ModelConfig {
49    fn default() -> Self {
50        Self {
51            model: "ollama/tinyllama".to_string(),
52            temperature: default_temperature(),
53            max_tokens: default_max_tokens(),
54            top_p: Some(default_top_p()),
55            frequency_penalty: None,
56            presence_penalty: None,
57            system_prompt: Some(prompts::get_system_prompt()),
58            thinking_enabled: default_thinking_enabled(),
59            backend_options: HashMap::new(),
60        }
61    }
62}
63
64impl ModelConfig {
65    /// Get a backend-specific option
66    pub fn get_backend_option(&self, backend: &str, key: &str) -> Option<&String> {
67        self.backend_options.get(backend)?.get(key)
68    }
69
70    /// Get backend option as integer
71    pub fn get_backend_option_i32(&self, backend: &str, key: &str) -> Option<i32> {
72        self.get_backend_option(backend, key)?
73            .parse::<i32>()
74            .ok()
75    }
76
77    /// Get backend option as boolean
78    pub fn get_backend_option_bool(&self, backend: &str, key: &str) -> Option<bool> {
79        self.get_backend_option(backend, key)?
80            .parse::<bool>()
81            .ok()
82    }
83
84    /// Set a backend-specific option
85    pub fn set_backend_option(&mut self, backend: String, key: String, value: String) {
86        self.backend_options
87            .entry(backend)
88            .or_insert_with(HashMap::new)
89            .insert(key, value);
90    }
91
92    /// Extract Ollama-specific options
93    pub fn ollama_options(&self) -> OllamaOptions {
94        OllamaOptions {
95            num_gpu: self.get_backend_option_i32("ollama", "num_gpu"),
96            num_thread: self.get_backend_option_i32("ollama", "num_thread"),
97            num_ctx: self.get_backend_option_i32("ollama", "num_ctx"),
98            numa: self.get_backend_option_bool("ollama", "numa"),
99            cloud_api_key: self.get_backend_option("ollama", "cloud_api_key").cloned(),
100        }
101    }
102}
103
104/// Ollama-specific options (extracted from backend_options)
105#[derive(Debug, Clone, Default)]
106pub struct OllamaOptions {
107    pub num_gpu: Option<i32>,
108    pub num_thread: Option<i32>,
109    pub num_ctx: Option<i32>,
110    pub numa: Option<bool>,
111    pub cloud_api_key: Option<String>,
112}
113
114/// Backend connection configuration
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct BackendConfig {
117    /// Ollama server URL (default: http://localhost:11434)
118    #[serde(default = "default_ollama_url")]
119    pub ollama_url: String,
120
121    /// Connection timeout in seconds
122    #[serde(default = "default_timeout")]
123    pub timeout_secs: u64,
124
125    /// Request timeout in seconds
126    #[serde(default = "default_request_timeout")]
127    pub request_timeout_secs: u64,
128
129    /// Max idle connections per host
130    #[serde(default = "default_max_idle")]
131    pub max_idle_per_host: usize,
132
133    /// Health check interval in seconds
134    #[serde(default = "default_health_check_interval")]
135    pub health_check_interval_secs: u64,
136}
137
138impl Default for BackendConfig {
139    fn default() -> Self {
140        Self {
141            ollama_url: default_ollama_url(),
142            timeout_secs: default_timeout(),
143            request_timeout_secs: default_request_timeout(),
144            max_idle_per_host: default_max_idle(),
145            health_check_interval_secs: default_health_check_interval(),
146        }
147    }
148}
149
150// Default value functions
151fn default_temperature() -> f32 {
152    0.7
153}
154
155fn default_max_tokens() -> usize {
156    4096
157}
158
159fn default_top_p() -> f32 {
160    1.0
161}
162
163fn default_ollama_url() -> String {
164    std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".to_string())
165}
166
167fn default_timeout() -> u64 {
168    10
169}
170
171fn default_request_timeout() -> u64 {
172    120
173}
174
175fn default_max_idle() -> usize {
176    10
177}
178
179fn default_health_check_interval() -> u64 {
180    30
181}
182
183fn default_thinking_enabled() -> bool {
184    true
185}