Skip to main content

mermaid_cli/models/
config.rs

1//! Unified configuration system for models and backends
2//!
3//! Replaces the fragmented app::Config + models::ModelConfig split
4//! with a single, coherent, backend-agnostic configuration structure.
5
6use crate::constants::{DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE};
7use crate::prompts;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11/// Unified model configuration
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ModelConfig {
14    /// Model identifier (provider/model or just model name)
15    /// Examples: "ollama/qwen3-coder:30b", "qwen3-coder:30b", "gpt-4"
16    pub model: String,
17
18    /// Temperature (0.0-2.0, controls randomness)
19    #[serde(default = "default_temperature")]
20    pub temperature: f32,
21
22    /// Maximum tokens to generate
23    #[serde(default = "default_max_tokens")]
24    pub max_tokens: usize,
25
26    /// Top-p sampling (0.0-1.0)
27    pub top_p: Option<f32>,
28
29    /// Frequency penalty (-2.0 to 2.0)
30    pub frequency_penalty: Option<f32>,
31
32    /// Presence penalty (-2.0 to 2.0)
33    pub presence_penalty: Option<f32>,
34
35    /// System prompt override (None = use default)
36    pub system_prompt: Option<String>,
37
38    /// Enable thinking mode for models that support it (e.g., kimi, qwen3)
39    /// Default: true (enabled)
40    #[serde(default = "default_thinking_enabled")]
41    pub thinking_enabled: bool,
42
43    /// Backend-specific options (provider name -> key/value pairs)
44    /// Example: {"ollama": {"num_gpu": "10", "num_ctx": "8192"}}
45    #[serde(default)]
46    pub backend_options: HashMap<String, HashMap<String, String>>,
47}
48
49impl Default for ModelConfig {
50    fn default() -> Self {
51        Self {
52            model: "ollama/tinyllama".to_string(),
53            temperature: default_temperature(),
54            max_tokens: default_max_tokens(),
55            top_p: Some(default_top_p()),
56            frequency_penalty: None,
57            presence_penalty: None,
58            system_prompt: Some(prompts::get_system_prompt()),
59            thinking_enabled: default_thinking_enabled(),
60            backend_options: HashMap::new(),
61        }
62    }
63}
64
65impl ModelConfig {
66    /// Get a backend-specific option
67    pub fn get_backend_option(&self, backend: &str, key: &str) -> Option<&String> {
68        self.backend_options.get(backend)?.get(key)
69    }
70
71    /// Get backend option as integer
72    pub fn get_backend_option_i32(&self, backend: &str, key: &str) -> Option<i32> {
73        self.get_backend_option(backend, key)?
74            .parse::<i32>()
75            .ok()
76    }
77
78    /// Get backend option as boolean
79    pub fn get_backend_option_bool(&self, backend: &str, key: &str) -> Option<bool> {
80        self.get_backend_option(backend, key)?
81            .parse::<bool>()
82            .ok()
83    }
84
85    /// Set a backend-specific option
86    pub fn set_backend_option(&mut self, backend: String, key: String, value: String) {
87        self.backend_options
88            .entry(backend)
89            .or_default()
90            .insert(key, value);
91    }
92
93    /// Extract Ollama-specific options
94    pub fn ollama_options(&self) -> OllamaOptions {
95        OllamaOptions {
96            num_gpu: self.get_backend_option_i32("ollama", "num_gpu"),
97            num_thread: self.get_backend_option_i32("ollama", "num_thread"),
98            num_ctx: self.get_backend_option_i32("ollama", "num_ctx"),
99            numa: self.get_backend_option_bool("ollama", "numa"),
100            cloud_api_key: self.get_backend_option("ollama", "cloud_api_key").cloned(),
101        }
102    }
103}
104
105/// Ollama-specific options (extracted from backend_options)
106#[derive(Debug, Clone, Default)]
107pub struct OllamaOptions {
108    pub num_gpu: Option<i32>,
109    pub num_thread: Option<i32>,
110    pub num_ctx: Option<i32>,
111    pub numa: Option<bool>,
112    pub cloud_api_key: Option<String>,
113}
114
115/// Backend connection configuration
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct BackendConfig {
118    /// Ollama server URL (default: http://localhost:11434)
119    #[serde(default = "default_ollama_url")]
120    pub ollama_url: String,
121
122    /// Connection timeout in seconds
123    #[serde(default = "default_timeout")]
124    pub timeout_secs: u64,
125
126    /// Request timeout in seconds
127    #[serde(default = "default_request_timeout")]
128    pub request_timeout_secs: u64,
129
130    /// Max idle connections per host
131    #[serde(default = "default_max_idle")]
132    pub max_idle_per_host: usize,
133
134    /// Health check interval in seconds
135    #[serde(default = "default_health_check_interval")]
136    pub health_check_interval_secs: u64,
137}
138
139impl Default for BackendConfig {
140    fn default() -> Self {
141        Self {
142            ollama_url: default_ollama_url(),
143            timeout_secs: default_timeout(),
144            request_timeout_secs: default_request_timeout(),
145            max_idle_per_host: default_max_idle(),
146            health_check_interval_secs: default_health_check_interval(),
147        }
148    }
149}
150
151// Default value functions
152fn default_temperature() -> f32 {
153    DEFAULT_TEMPERATURE
154}
155
156fn default_max_tokens() -> usize {
157    DEFAULT_MAX_TOKENS
158}
159
160fn default_top_p() -> f32 {
161    1.0
162}
163
164fn default_ollama_url() -> String {
165    std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".to_string())
166}
167
168fn default_timeout() -> u64 {
169    10
170}
171
172fn default_request_timeout() -> u64 {
173    120
174}
175
176fn default_max_idle() -> usize {
177    10
178}
179
180fn default_health_check_interval() -> u64 {
181    30
182}
183
184fn default_thinking_enabled() -> bool {
185    true
186}