Skip to main content

llm_manager/
config.rs

1mod model_config;
2mod presets;
3mod profiles;
4mod store;
5
6use std::collections::HashSet;
7use std::path::PathBuf;
8
9use chrono::Local;
10use serde::{Deserialize, Serialize};
11
12pub use model_config::ModelConfigStore;
13
14pub use profiles::ProfileStore;
15
16use crate::models::{
17    Backend, CacheType, CacheTypeK, CacheTypeV, Mirostat, NumMode, RopeScaling, Samplers, SplitMode,
18};
19pub use presets::PresetStore;
20
21/// Resolve the base config directory with a safe fallback chain.
22///
23/// Prefers `dirs::config_dir()` (XDG on Linux, ~/Library/Application Support on macOS,
24/// etc.), falls back to `~/.config`, and lastly `./.llm-manager` if both fail.
25pub fn config_base_dir() -> PathBuf {
26    if let Some(d) = dirs::config_dir() {
27        return d;
28    }
29    if let Some(home) = dirs::home_dir() {
30        return home.join(".config");
31    }
32    PathBuf::from(".").join(".llm-manager")
33}
34
35/// Count physical CPU cores on Linux (ignores hyperthreading).
36/// Falls back to 1 if the file can't be read or parsing fails.
37pub fn physical_cores() -> u32 {
38    let content = match std::fs::read_to_string("/proc/cpuinfo") {
39        Ok(c) => c,
40        Err(_) => {
41            return std::thread::available_parallelism()
42                .map(|p| p.get() as u32)
43                .unwrap_or(1);
44        }
45    };
46    let mut seen = HashSet::new();
47    let mut cur_phys: Option<&str> = None;
48    let mut cur_core: Option<&str> = None;
49    for line in content.lines() {
50        if let Some((key, val)) = line.split_once(':') {
51            let key = key.trim();
52            let val = val.trim();
53            match key {
54                "physical id" => cur_phys = Some(val),
55                "core id" => cur_core = Some(val),
56                _ => {}
57            }
58            if let (Some(phys), Some(core)) = (cur_phys, cur_core) {
59                seen.insert((phys, core));
60            }
61        }
62    }
63    seen.len() as u32
64}
65
66/// A remote RPC worker for distributed inference.
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct RpcWorker {
69    #[serde(default)]
70    pub selected: bool,
71    #[serde(default)]
72    pub name: String,
73    pub ip: String,
74    #[serde(default = "default_rpc_port")]
75    pub port: u16,
76}
77
78fn default_rpc_port() -> u16 {
79    50052
80}
81
82/// WebSocket dashboard server configuration.
83#[derive(Debug, Clone, Serialize, Deserialize, Default)]
84pub struct WsServer {
85    #[serde(default)]
86    pub enabled: bool,
87    #[serde(default = "default_ws_port")]
88    pub port: u16,
89    #[serde(default)]
90    pub auth_key: Option<String>,
91    #[serde(default = "default_ws_host")]
92    pub host: String,
93    #[serde(default)]
94    pub tls_enabled: bool,
95    #[serde(default)]
96    pub tls_cert: Option<String>,
97    #[serde(default)]
98    pub tls_key: Option<String>,
99}
100
101fn default_ws_host() -> String {
102    "0.0.0.0".to_string()
103}
104
105fn default_ws_port() -> u16 {
106    49223
107}
108
109/// Global configuration.
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct Config {
112    pub models_dirs: Vec<PathBuf>,
113    pub llama_server: PathBuf,
114    pub default: DefaultParams,
115    /// Per-model overrides (keyed by model file name, stored as YAML in models/).
116    #[serde(default, skip)]
117    pub model_overrides: ModelConfigStore,
118    /// Named profiles of settings presets (stored as YAML in profiles/).
119    #[serde(default, skip)]
120    pub profiles: ProfileStore,
121    /// System prompt presets (stored as YAML in presets/).
122    #[serde(default, skip)]
123    pub system_prompt_presets: PresetStore,
124    /// RPC Workers for distributed inference.
125    #[serde(default)]
126    pub rpc_workers: Vec<RpcWorker>,
127    /// WebSocket dashboard server for live metrics.
128    #[serde(default)]
129    pub ws_server: WsServer,
130    /// Number of results per HuggingFace search query.
131    #[serde(default = "default_search_limit")]
132    pub search_limit: u32,
133}
134
135fn default_search_limit() -> u32 {
136    50
137}
138
139/// A named profile of settings.
140#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
141pub struct Profile {
142    pub name: String,
143    /// Brief description shown in the profile list.
144    pub description: String,
145    /// The settings for this profile.
146    #[serde(default)]
147    pub settings: ModelOverride,
148}
149
150impl Profile {
151    /// Apply this profile's settings to a base ModelSettings.
152    pub fn apply(&self, mut base: crate::models::ModelSettings) -> crate::models::ModelSettings {
153        self.settings.apply(&mut base);
154        base
155    }
156}
157
158/// A named system prompt preset.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct SystemPromptPreset {
161    pub name: String,
162    pub description: String,
163    pub content: String,
164}
165
166/// Built-in system prompt presets.
167pub fn builtin_system_prompt_presets() -> Vec<SystemPromptPreset> {
168    vec![
169        SystemPromptPreset {
170            name: "General".into(),
171            description: "General-purpose assistant".into(),
172            content: "You are a helpful assistant.".into(),
173        },
174        SystemPromptPreset {
175            name: "Coder".into(),
176            description: "Expert software developer".into(),
177            content: "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.".into(),
178        },
179        SystemPromptPreset {
180            name: "Thinker".into(),
181            description: "Analytical and thoughtful".into(),
182            content: "You are a thoughtful and analytical AI assistant. Think carefully before answering. Provide well-reasoned responses with clear explanations.".into(),
183        },
184        SystemPromptPreset {
185            name: "Mathematician".into(),
186            description: "Expert in mathematics".into(),
187            content: "You are an expert in mathematics. Provide clear, step-by-step solutions to mathematical problems. Show your reasoning and explain key concepts.".into(),
188        },
189    ]
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
193pub struct ModelOverride {
194    // Loading
195    pub context_length: Option<u32>,
196    pub batch_size: Option<u32>,
197    pub ubatch_size: Option<u32>,
198    pub cache_type_k: Option<CacheTypeK>,
199    pub cache_type_v: Option<CacheTypeV>,
200    pub keep: Option<i32>,
201    pub swa_full: Option<bool>,
202    pub mlock: Option<bool>,
203    pub mmap: Option<bool>,
204    pub numa: Option<NumMode>,
205    pub uniform_cache: Option<bool>,
206    pub system_prompt: Option<String>,
207    pub system_prompt_preset_name: Option<String>,
208    pub max_concurrent_predictions: Option<u32>,
209    pub threads: Option<u32>,
210    pub threads_batch: Option<u32>,
211    pub parallel: Option<u32>,
212
213    // GPU
214    pub gpu_layers: Option<i32>,
215    pub split_mode: Option<SplitMode>,
216    pub tensor_split: Option<String>,
217    pub main_gpu: Option<i32>,
218    pub fit: Option<bool>,
219    pub lora: Option<PathBuf>,
220    pub lora_scaled: Option<(PathBuf, f32)>,
221    pub rpc: Option<String>,
222    pub embedding: Option<bool>,
223    pub kv_cache_offload: Option<bool>,
224    pub flash_attn: Option<bool>,
225    pub jinja: Option<bool>,
226    pub chat_template: Option<String>,
227    pub chat_template_kwargs: Option<String>,
228    pub expert_count: Option<i32>,
229    pub gpu_layers_mode: Option<crate::models::GpuLayersMode>,
230
231    // Sampling
232    pub seed: Option<i32>,
233    pub temperature: Option<f32>,
234    pub top_k: Option<i32>,
235    pub top_p: Option<f32>,
236    pub min_p: Option<f32>,
237    pub typical_p: Option<f32>,
238    pub mirostat: Option<Mirostat>,
239    pub mirostat_lr: Option<f32>,
240    pub mirostat_ent: Option<f32>,
241    pub ignore_eos: Option<bool>,
242    pub samplers: Option<Samplers>,
243
244    // Repetition
245    pub repeat_penalty: Option<f32>,
246    pub repeat_last_n: Option<i32>,
247    pub presence_penalty: Option<f32>,
248    pub frequency_penalty: Option<f32>,
249    pub dry_multiplier: Option<f32>,
250    pub dry_base: Option<f32>,
251    pub dry_allowed_length: Option<i32>,
252    pub dry_penalty_last_n: Option<i32>,
253
254    // RoPE
255    pub rope_scaling: Option<RopeScaling>,
256    pub rope_scale: Option<f32>,
257    pub rope_freq_base: Option<f32>,
258    pub rope_freq_scale: Option<f32>,
259    pub rope_yarn_enabled: Option<bool>,
260
261    // Server
262    pub cache_prompt: Option<bool>,
263    pub cache_reuse: Option<u32>,
264    pub webui: Option<bool>,
265    pub ws_server_enabled: Option<bool>,
266    pub ws_server_port: Option<u16>,
267    pub ws_server_auth_key: Option<String>,
268    pub ws_server_tls_enabled: Option<bool>,
269    pub ws_server_tls_cert: Option<String>,
270    pub ws_server_tls_key: Option<String>,
271
272    // Other
273    pub max_tokens: Option<u32>,
274    pub cache_type: Option<CacheType>,
275    pub llama_cpp_version_cpu: Option<String>,
276    pub llama_cpp_version_vulkan: Option<String>,
277    pub llama_cpp_version_rocm: Option<String>,
278    pub llama_cpp_version_rocm_lemonade: Option<String>,
279    pub llama_cpp_version_cuda: Option<String>,
280    pub spec_type: Option<String>,
281    pub draft_tokens: Option<u32>,
282    pub tags: Option<Vec<String>>,
283}
284
285/// Apply a scalar Copy field from override: `base.f = self.f.unwrap_or(base.f)`.
286macro_rules! apply_scalar {
287    ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
288        $(
289            $base.$field = $self.$field.unwrap_or($base.$field);
290        )+
291    };
292}
293
294/// Apply a Clone field from override: `if let Some(v) = &self.f { base.f = v.clone(); }`.
295macro_rules! apply_clone {
296    ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
297        $(
298            if let Some(v) = &$self.$field {
299                $base.$field = v.clone();
300            }
301        )+
302    };
303}
304
305/// Apply an Option<T> field from override: `if let Some(v) = &self.f { base.f = Some(v.clone()); }`.
306macro_rules! apply_option {
307    ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
308        $(
309            if let Some(v) = &$self.$field {
310                $base.$field = Some(v.clone());
311            }
312        )+
313    };
314}
315
316impl ModelOverride {
317    pub fn from_settings(s: &crate::models::ModelSettings) -> Self {
318        Self {
319            context_length: Some(s.context_length),
320            batch_size: Some(s.batch_size),
321            ubatch_size: Some(s.ubatch_size),
322            cache_type_k: s.cache_type_k,
323            cache_type_v: s.cache_type_v,
324            keep: Some(s.keep),
325            swa_full: Some(s.swa_full),
326            mlock: Some(s.mlock),
327            mmap: Some(s.mmap),
328            numa: Some(s.numa),
329            uniform_cache: Some(s.uniform_cache),
330            system_prompt: Some(s.system_prompt.clone()),
331            system_prompt_preset_name: Some(s.system_prompt_preset_name.clone()),
332            max_concurrent_predictions: s.max_concurrent_predictions,
333            threads: Some(s.threads),
334            threads_batch: Some(s.threads_batch),
335            parallel: Some(s.parallel),
336            gpu_layers: Some(match s.gpu_layers_mode {
337                crate::models::GpuLayersMode::Auto => 0,
338                crate::models::GpuLayersMode::Specific(n) => n as i32,
339                crate::models::GpuLayersMode::All => -1,
340            }),
341            gpu_layers_mode: Some(s.gpu_layers_mode),
342            split_mode: Some(s.split_mode),
343            tensor_split: Some(s.tensor_split.clone()),
344            main_gpu: Some(s.main_gpu),
345            fit: Some(s.fit),
346            lora: s.lora.clone(),
347            lora_scaled: s.lora_scaled.clone(),
348            rpc: Some(s.rpc.clone()),
349            embedding: Some(s.embedding),
350            kv_cache_offload: Some(s.kv_cache_offload),
351            flash_attn: Some(s.flash_attn),
352            jinja: Some(s.jinja),
353            chat_template: s.chat_template.clone(),
354            chat_template_kwargs: s.chat_template_kwargs.clone(),
355            expert_count: Some(s.expert_count),
356            seed: Some(s.seed),
357            temperature: Some(s.temperature),
358            top_k: Some(s.top_k),
359            top_p: Some(s.top_p),
360            min_p: Some(s.min_p),
361            typical_p: Some(s.typical_p),
362            mirostat: Some(s.mirostat),
363            mirostat_lr: Some(s.mirostat_lr),
364            mirostat_ent: Some(s.mirostat_ent),
365            ignore_eos: Some(s.ignore_eos),
366            samplers: Some(s.samplers.clone()),
367            repeat_penalty: Some(s.repeat_penalty),
368            repeat_last_n: Some(s.repeat_last_n),
369            presence_penalty: s.presence_penalty,
370            frequency_penalty: s.frequency_penalty,
371            dry_multiplier: Some(s.dry_multiplier),
372            dry_base: Some(s.dry_base),
373            dry_allowed_length: Some(s.dry_allowed_length),
374            dry_penalty_last_n: Some(s.dry_penalty_last_n),
375            rope_scaling: Some(s.rope_scaling),
376            rope_scale: Some(s.rope_scale),
377            rope_freq_base: Some(s.rope_freq_base),
378            rope_freq_scale: Some(s.rope_freq_scale),
379            rope_yarn_enabled: Some(s.rope_yarn_enabled),
380            cache_prompt: Some(s.cache_prompt),
381            cache_reuse: Some(s.cache_reuse),
382            webui: Some(s.webui),
383            max_tokens: s.max_tokens,
384            cache_type: Some(s.cache_type),
385            llama_cpp_version_cpu: s.llama_cpp_version_cpu.clone(),
386            llama_cpp_version_vulkan: s.llama_cpp_version_vulkan.clone(),
387            llama_cpp_version_rocm: s.llama_cpp_version_rocm.clone(),
388            llama_cpp_version_rocm_lemonade: s.llama_cpp_version_rocm_lemonade.clone(),
389            llama_cpp_version_cuda: s.llama_cpp_version_cuda.clone(),
390            spec_type: Some(s.spec_type.clone()),
391            draft_tokens: Some(s.draft_tokens),
392            tags: Some(s.tags.clone()),
393            ws_server_enabled: Some(s.ws_server_enabled),
394            ws_server_port: Some(s.ws_server_port),
395            ws_server_auth_key: s.ws_server_auth_key.clone(),
396            ws_server_tls_enabled: Some(s.ws_server_tls_enabled),
397            ws_server_tls_cert: s.ws_server_tls_cert.clone(),
398            ws_server_tls_key: s.ws_server_tls_key.clone(),
399        }
400    }
401
402    /// Merge override into a base ModelSettings (in-place).
403    pub fn apply(&self, base: &mut crate::models::ModelSettings) {
404        // Override values always take precedence. For Option<T> fields,
405        // the override value (even None) is explicitly set by the user.
406
407        // Scalar Copy fields: base.f = self.f.unwrap_or(base.f)
408        apply_scalar!(self, base,
409            context_length, batch_size, ubatch_size, keep, swa_full, mlock, mmap,
410            numa, uniform_cache, kv_cache_offload, threads, threads_batch, parallel,
411            split_mode, main_gpu, fit, embedding, flash_attn, jinja, expert_count,
412            seed, temperature, top_k, top_p, min_p, typical_p,
413            mirostat, mirostat_lr, mirostat_ent, ignore_eos,
414            repeat_penalty, repeat_last_n,
415            dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
416            rope_scaling, rope_scale, rope_freq_base, rope_freq_scale, rope_yarn_enabled,
417            cache_prompt, cache_reuse, webui, cache_type,
418            ws_server_enabled, ws_server_port, ws_server_tls_enabled,
419            draft_tokens, gpu_layers_mode,
420        );
421
422        // Cloneable fields: if let Some(v) = &self.f { base.f = v.clone(); }
423        apply_clone!(self, base,
424            system_prompt, system_prompt_preset_name, tensor_split, rpc,
425            samplers, spec_type, tags,
426        );
427
428        // Option<T> fields: if let Some(v) = &self.f { base.f = Some(v.clone()); }
429        apply_option!(self, base,
430            lora, lora_scaled, chat_template, chat_template_kwargs,
431            llama_cpp_version_cpu, llama_cpp_version_vulkan,
432            llama_cpp_version_rocm, llama_cpp_version_rocm_lemonade,
433            llama_cpp_version_cuda,
434            ws_server_auth_key, ws_server_tls_cert, ws_server_tls_key,
435        );
436
437        // Direct Option<T> assignment (same type in both structs)
438        base.cache_type_k = self.cache_type_k;
439        base.cache_type_v = self.cache_type_v;
440        base.presence_penalty = self.presence_penalty;
441        base.frequency_penalty = self.frequency_penalty;
442        base.max_tokens = self.max_tokens;
443
444        // Special: max_concurrent_predictions uses or() for Option chaining
445        base.max_concurrent_predictions = self
446            .max_concurrent_predictions
447            .or(base.max_concurrent_predictions);
448
449        // Special: gpu_layers converts i32 legacy field to GpuLayersMode enum
450        // Only applies when gpu_layers is explicitly set in the override.
451        if let Some(n) = self.gpu_layers {
452            base.gpu_layers_mode = match n {
453                n if n < 0 => crate::models::GpuLayersMode::All,
454                n => crate::models::GpuLayersMode::Specific(n as u32),
455            };
456        }
457
458        // FIELD ACCOUNTING (ModelOverride: 92 fields):
459        // - apply_scalar: 55 fields
460        // - apply_clone: 7 fields
461        // - apply_option: 13 fields
462        // - direct Option assign: 5 fields (cache_type_k, cache_type_v, presence_penalty,
463        //   frequency_penalty, max_tokens)
464        // - special: 1 field (max_concurrent_predictions)
465        // - conditional: gpu_layers overrides gpu_layers_mode only when Some
466        // - NOT in ModelSettings: 0 (all ModelOverride fields mapped above)
467        //
468        // ModelSettings fields NOT in ModelOverride (not overridable):
469        // host, port, timeout, backend, platform, router_max_models, server_mode,
470        // api_endpoint_enabled, api_endpoint_port
471        //
472        // When adding a field: ensure it appears in exactly one category above.
473    }
474}
475
476/// Built-in profiles with sensible defaults for popular model families.
477pub fn builtin_profiles() -> Vec<Profile> {
478    vec![
479        Profile {
480            name: "Qwen".into(),
481            description: "Optimized for Qwen models (dense)".into(),
482            settings: ModelOverride {
483                context_length: Some(131072),
484                temperature: Some(0.7),
485                top_k: Some(20),
486                top_p: Some(0.95),
487                max_tokens: Some(4096),
488                presence_penalty: Some(0.0),
489                uniform_cache: Some(true),
490                jinja: Some(true),
491                ..Default::default()
492            },
493        },
494        Profile {
495            name: "Qwen-MoE".into(),
496            description: "Optimized for Qwen MoE models (35B-A3B)".into(),
497            settings: ModelOverride {
498                context_length: Some(131072),
499                temperature: Some(0.8),
500                top_k: Some(20),
501                top_p: Some(0.95),
502                max_tokens: Some(4096),
503                presence_penalty: Some(1.5),
504                uniform_cache: Some(true),
505                jinja: Some(true),
506                ..Default::default()
507            },
508        },
509        Profile {
510            name: "Qwen-Coding".into(),
511            description: "Optimized for Qwen models in coding mode".into(),
512            settings: ModelOverride {
513                context_length: Some(131072),
514                temperature: Some(0.6),
515                top_k: Some(20),
516                top_p: Some(0.95),
517                max_tokens: Some(4096),
518                presence_penalty: Some(0.0),
519                uniform_cache: Some(true),
520                jinja: Some(true),
521                ..Default::default()
522            },
523        },
524        Profile {
525            name: "Gemma".into(),
526            description: "Optimized for Gemma 2/4 models".into(),
527            settings: ModelOverride {
528                context_length: Some(131072),
529                min_p: Some(0.1),
530                temperature: Some(1.0),
531                top_k: Some(65),
532                top_p: Some(0.95),
533                max_tokens: Some(4096),
534                uniform_cache: Some(true),
535                jinja: Some(true),
536                ..Default::default()
537            },
538        },
539        Profile {
540            name: "Llama".into(),
541            description: "Optimized for Llama 3.1/3.3 models".into(),
542            settings: ModelOverride {
543                context_length: Some(131072),
544                temperature: Some(0.7),
545                top_p: Some(0.9),
546                repeat_penalty: Some(1.1),
547                max_tokens: Some(4096),
548                uniform_cache: Some(true),
549                jinja: Some(true),
550                ..Default::default()
551            },
552        },
553        Profile {
554            name: "Mistral".into(),
555            description: "Optimized for Mistral 7B/NeMo models".into(),
556            settings: ModelOverride {
557                context_length: Some(131072),
558                temperature: Some(0.7),
559                top_k: Some(50),
560                top_p: Some(0.9),
561                max_tokens: Some(4096),
562                uniform_cache: Some(true),
563                jinja: Some(true),
564                ..Default::default()
565            },
566        },
567        Profile {
568            name: "Phi".into(),
569            description: "Optimized for Phi 3.5 Mini models".into(),
570            settings: ModelOverride {
571                context_length: Some(131072),
572                temperature: Some(0.7),
573                top_k: Some(50),
574                top_p: Some(0.9),
575                repeat_penalty: Some(1.1),
576                max_tokens: Some(4096),
577                uniform_cache: Some(true),
578                ..Default::default()
579            },
580        },
581    ]
582}
583
584#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
585#[serde(default)]
586pub struct DefaultParams {
587    // Loading
588    #[serde(default)]
589    pub context_length: u32,
590    #[serde(default)]
591    pub threads: u32,
592    #[serde(default)]
593    pub threads_batch: u32,
594    #[serde(default)]
595    pub batch_size: u32,
596    #[serde(default)]
597    pub ubatch_size: u32,
598    #[serde(default = "default_cache_type_k")]
599    pub cache_type_k: Option<CacheTypeK>,
600    #[serde(default = "default_cache_type_v")]
601    pub cache_type_v: Option<CacheTypeV>,
602    #[serde(default)]
603    pub keep: i32,
604    #[serde(default)]
605    pub swa_full: bool,
606    #[serde(default)]
607    pub mlock: bool,
608    #[serde(default)]
609    pub mmap: bool,
610    #[serde(default)]
611    pub numa: NumMode,
612    #[serde(default)]
613    pub uniform_cache: bool,
614    #[serde(default)]
615    pub kv_cache_offload: bool,
616    #[serde(default)]
617    pub parallel: u32,
618    #[serde(default)]
619    pub max_concurrent_predictions: Option<u32>,
620    #[serde(default)]
621    pub system_prompt: String,
622    #[serde(default = "default_system_prompt_preset_name")]
623    pub system_prompt_preset_name: String,
624    // GPU
625    #[serde(default)]
626    pub gpu_layers: i32,
627    #[serde(default = "default_gpu_layers_mode")]
628    pub gpu_layers_mode: crate::models::GpuLayersMode,
629    #[serde(default)]
630    pub split_mode: SplitMode,
631    #[serde(default)]
632    pub tensor_split: String,
633    #[serde(default)]
634    pub main_gpu: i32,
635    #[serde(default)]
636    pub fit: bool,
637    #[serde(default)]
638    pub lora: Option<PathBuf>,
639    #[serde(default)]
640    pub lora_scaled: Option<(PathBuf, f32)>,
641    #[serde(default)]
642    pub rpc: String,
643    #[serde(default)]
644    pub embedding: bool,
645    #[serde(default)]
646    pub flash_attn: bool,
647    #[serde(default)]
648    pub jinja: bool,
649    #[serde(default)]
650    pub chat_template: Option<String>,
651    #[serde(default)]
652    pub chat_template_kwargs: Option<String>,
653    #[serde(default)]
654    pub expert_count: i32,
655
656    // Sampling
657    #[serde(default)]
658    pub seed: i32,
659    #[serde(default)]
660    pub temperature: f32,
661    #[serde(default)]
662    pub top_k: i32,
663    #[serde(default)]
664    pub top_p: f32,
665    #[serde(default)]
666    pub min_p: f32,
667    #[serde(default)]
668    pub typical_p: f32,
669    #[serde(default)]
670    pub mirostat: Mirostat,
671    #[serde(default)]
672    pub mirostat_lr: f32,
673    #[serde(default)]
674    pub mirostat_ent: f32,
675    #[serde(default)]
676    pub ignore_eos: bool,
677    #[serde(default)]
678    pub samplers: Samplers,
679
680    // Repetition
681    #[serde(default)]
682    pub repeat_penalty: f32,
683    #[serde(default)]
684    pub repeat_last_n: i32,
685    #[serde(default = "default_presence_penalty")]
686    pub presence_penalty: Option<f32>,
687    #[serde(default = "default_frequency_penalty")]
688    pub frequency_penalty: Option<f32>,
689    #[serde(default)]
690    pub dry_multiplier: f32,
691    #[serde(default)]
692    pub dry_base: f32,
693    #[serde(default)]
694    pub dry_allowed_length: i32,
695    #[serde(default)]
696    pub dry_penalty_last_n: i32,
697
698    // RoPE
699    #[serde(default)]
700    pub rope_scaling: RopeScaling,
701    #[serde(default)]
702    pub rope_scale: f32,
703    #[serde(default)]
704    pub rope_freq_base: f32,
705    #[serde(default)]
706    pub rope_freq_scale: f32,
707    #[serde(default)]
708    pub rope_yarn_enabled: bool,
709
710    // Server
711    #[serde(default)]
712    pub host: String,
713    #[serde(default)]
714    pub port: u16,
715    #[serde(default)]
716    pub timeout: u32,
717    #[serde(default = "default_cache_prompt")]
718    pub cache_prompt: bool,
719    #[serde(default)]
720    pub cache_reuse: u32,
721    #[serde(default)]
722    pub webui: bool,
723    #[serde(default)]
724    pub ws_server_enabled: bool,
725    #[serde(default = "default_ws_server_port")]
726    pub ws_server_port: u16,
727    #[serde(default)]
728    pub ws_server_auth_key: Option<String>,
729    #[serde(default)]
730    pub ws_server_tls_enabled: bool,
731    #[serde(default)]
732    pub ws_server_tls_cert: Option<String>,
733    #[serde(default)]
734    pub ws_server_tls_key: Option<String>,
735    #[serde(default)]
736    pub router_max_models: u32,
737    #[serde(default)]
738    pub server_mode: crate::models::ServerMode,
739
740    // Other
741    #[serde(default = "default_max_tokens")]
742    pub max_tokens: Option<u32>,
743    #[serde(default)]
744    pub cache_type: CacheType,
745    #[serde(default)]
746    pub backend: Backend,
747    /// Platform override: "linux", "windows", or "macos". If None, auto-detected.
748    #[serde(default)]
749    pub platform: Option<String>,
750    #[serde(default)]
751    pub llama_cpp_version_cpu: Option<String>,
752    #[serde(default)]
753    pub llama_cpp_version_vulkan: Option<String>,
754    #[serde(default)]
755    pub llama_cpp_version_rocm: Option<String>,
756    #[serde(default)]
757    pub llama_cpp_version_rocm_lemonade: Option<String>,
758    #[serde(default)]
759    pub llama_cpp_version_cuda: Option<String>,
760
761    // API
762    #[serde(default)]
763    pub api_endpoint_enabled: bool,
764    #[serde(default = "default_api_endpoint_port")]
765    pub api_endpoint_port: u16,
766    #[serde(default)]
767    pub spec_type: String,
768    #[serde(default)]
769    pub draft_tokens: u32,
770    #[serde(default)]
771    pub tags: Vec<String>,
772}
773
774fn default_api_endpoint_port() -> u16 {
775    49222
776}
777
778fn default_system_prompt_preset_name() -> String {
779    "General".to_string()
780}
781
782fn default_cache_type_k() -> Option<CacheTypeK> {
783    None
784}
785fn default_cache_type_v() -> Option<CacheTypeV> {
786    None
787}
788fn default_presence_penalty() -> Option<f32> {
789    None
790}
791fn default_frequency_penalty() -> Option<f32> {
792    None
793}
794fn default_max_tokens() -> Option<u32> {
795    None
796}
797fn default_cache_prompt() -> bool {
798    true
799}
800fn default_ws_server_port() -> u16 {
801    49223
802}
803fn default_gpu_layers_mode() -> crate::models::GpuLayersMode {
804    crate::models::GpuLayersMode::Auto
805}
806
807impl Default for DefaultParams {
808    fn default() -> Self {
809        Self {
810            // Loading
811            context_length: 131072,
812            threads: physical_cores(),
813            threads_batch: 8,
814            batch_size: 512,
815            ubatch_size: 512,
816            cache_type_k: None,
817            cache_type_v: None,
818            keep: 0,
819            swa_full: false,
820            mlock: false,
821            mmap: true,
822            numa: NumMode::None,
823            uniform_cache: true,
824            kv_cache_offload: true,
825            parallel: 1,
826            max_concurrent_predictions: None,
827            system_prompt: "You are a helpful assistant.".to_string(),
828            system_prompt_preset_name: "General".to_string(),
829
830            // GPU
831            gpu_layers: -1,
832            gpu_layers_mode: crate::models::GpuLayersMode::Auto,
833            split_mode: SplitMode::Layer,
834            tensor_split: String::new(),
835            main_gpu: 0,
836            fit: true,
837            lora: None,
838            lora_scaled: None,
839            rpc: String::new(),
840            embedding: false,
841            flash_attn: true,
842            jinja: true,
843            chat_template: None,
844            chat_template_kwargs: None,
845            expert_count: -1,
846
847            // Sampling
848            seed: -1,
849            temperature: 0.8,
850            top_k: 40,
851            top_p: 0.95,
852            min_p: 0.0,
853            typical_p: 1.0,
854            mirostat: Mirostat::Off,
855            mirostat_lr: 0.1,
856            mirostat_ent: 5.0,
857            ignore_eos: false,
858            samplers: Samplers::default(),
859
860            // Repetition
861            repeat_penalty: 1.1,
862            repeat_last_n: 64,
863            presence_penalty: None,
864            frequency_penalty: None,
865            dry_multiplier: 0.0,
866            dry_base: 1.75,
867            dry_allowed_length: 2,
868            dry_penalty_last_n: -1,
869
870            // RoPE
871            rope_scaling: RopeScaling::None,
872            rope_scale: 1.0,
873            rope_freq_base: 0.0,
874            rope_freq_scale: 1.0,
875            rope_yarn_enabled: false,
876
877            // Server
878            host: "127.0.0.1".to_string(),
879            port: 8080,
880            timeout: 600,
881            cache_prompt: true,
882            cache_reuse: 0,
883            webui: false,
884            ws_server_enabled: false,
885            ws_server_port: 49223,
886            ws_server_auth_key: None,
887            ws_server_tls_enabled: false,
888            ws_server_tls_cert: None,
889            ws_server_tls_key: None,
890            router_max_models: 4,
891            server_mode: crate::models::ServerMode::Normal,
892
893            // Other
894            max_tokens: None,
895            cache_type: CacheType::F16,
896            backend: {
897                use crate::backend::hardware::{GpuVendor, detect_gpu_vendors};
898                let vendors = detect_gpu_vendors();
899                let mut result = Backend::Cpu;
900                for v in &vendors {
901                    if matches!(v, GpuVendor::Nvidia) {
902                        result = Backend::Cuda;
903                        break;
904                    }
905                    if matches!(v, GpuVendor::Amd) {
906                        result = Backend::Rocm;
907                        break;
908                    }
909                    if matches!(v, GpuVendor::Intel) {
910                        result = Backend::Vulkan;
911                        break;
912                    }
913                }
914                result
915            },
916            platform: None,
917            llama_cpp_version_cpu: None,
918            llama_cpp_version_vulkan: None,
919            llama_cpp_version_rocm: None,
920            llama_cpp_version_rocm_lemonade: None,
921            llama_cpp_version_cuda: None,
922            api_endpoint_enabled: false,
923            api_endpoint_port: 49222,
924            spec_type: String::new(),
925            draft_tokens: 0,
926            tags: Vec::new(),
927        }
928    }
929}
930
931impl Default for Config {
932    fn default() -> Self {
933        Self {
934            models_dirs: vec![
935                dirs::data_dir()
936                    .unwrap_or_default()
937                    .join("llm-manager")
938                    .join("models"),
939            ],
940            llama_server: "llama-server".into(),
941            default: DefaultParams::default(),
942            model_overrides: Default::default(),
943            profiles: Default::default(),
944            system_prompt_presets: Default::default(),
945            rpc_workers: Vec::new(),
946            ws_server: WsServer {
947                enabled: false,
948                port: 49223,
949                auth_key: None,
950                host: "0.0.0.0".to_string(),
951                tls_enabled: false,
952                tls_cert: None,
953                tls_key: None,
954            },
955            search_limit: default_search_limit(),
956        }
957    }
958}
959
960impl Config {
961    pub fn config_path() -> PathBuf {
962        config_base_dir()
963            .join("llm-manager")
964            .join("config.yaml")
965    }
966
967    /// Validate config values and return a list of warnings for invalid entries.
968    pub fn validate(&self) -> Vec<String> {
969        let mut warnings = Vec::new();
970        let default = &self.default;
971
972        // Numeric range checks
973        if default.context_length < 512 || default.context_length > 131072 {
974            warnings.push(format!(
975                "context_length {} is outside recommended range 512-131072",
976                default.context_length
977            ));
978        }
979        if default.temperature < 0.0 || default.temperature > 2.0 {
980            warnings.push(format!(
981                "temperature {} is outside recommended range 0.0-2.0",
982                default.temperature
983            ));
984        }
985        if (default.top_p < 0.0 || default.top_p > 1.0) && default.top_p != 0.0 {
986            warnings.push(format!(
987                "top_p {} is outside recommended range 0.0-1.0",
988                default.top_p
989            ));
990        }
991        if (default.repeat_penalty < 0.0 || default.repeat_penalty > 3.0)
992            && default.repeat_penalty != 1.0
993        {
994            warnings.push(format!(
995                "repeat_penalty {} is outside recommended range 0.0-3.0",
996                default.repeat_penalty
997            ));
998        }
999        if default.mirostat_lr < 0.0 || default.mirostat_lr > 1.0 {
1000            warnings.push(format!(
1001                "mirostat_lr {} is outside recommended range 0.0-1.0",
1002                default.mirostat_lr
1003            ));
1004        }
1005        if default.mirostat_ent < 0.0 || default.mirostat_ent > 10.0 {
1006            warnings.push(format!(
1007                "mirostat_ent {} is outside recommended range 0.0-10.0",
1008                default.mirostat_ent
1009            ));
1010        }
1011
1012        if default.timeout < 1 {
1013            warnings.push(format!(
1014                "timeout {} must be at least 1 second",
1015                default.timeout
1016            ));
1017        }
1018
1019        // Path validation
1020        if let Some(lora) = &default.lora
1021            && !lora.exists() {
1022                warnings.push(format!("lora path {} does not exist", lora.display()));
1023            }
1024        if let Some((lora, _)) = &default.lora_scaled
1025            && !lora.exists() {
1026                warnings.push(format!("lora path {} does not exist", lora.display()));
1027            }
1028
1029        // Model override validation
1030        for model_name in self.model_overrides.keys() {
1031            if let Some(override_settings) = self.model_overrides.get(model_name.as_str()) {
1032                if let Some(lora) = &override_settings.lora
1033                    && !lora.exists() {
1034                        warnings.push(format!(
1035                            "model '{}' lora path {} does not exist",
1036                            model_name,
1037                            lora.display()
1038                        ));
1039                    }
1040                if let Some((lora, _)) = &override_settings.lora_scaled
1041                    && !lora.exists() {
1042                        warnings.push(format!(
1043                            "model '{}' lora path {} does not exist",
1044                            model_name,
1045                            lora.display()
1046                        ));
1047                    }
1048            }
1049        }
1050
1051        warnings
1052    }
1053
1054    /// Resolve settings for a specific model and profile.
1055    pub fn resolve_settings(
1056        &self,
1057        model_name: Option<&str>,
1058        profile_name: Option<&str>,
1059    ) -> crate::models::ModelSettings {
1060        let mut settings = crate::models::ModelSettings::from_config(self);
1061
1062        // Apply model-specific override
1063        if let Some(name) = model_name
1064            && let Some(override_settings) = self.model_overrides.get(name)
1065        {
1066            override_settings.apply(&mut settings);
1067        }
1068
1069        // Apply profile override if specified
1070        if let Some(p_name) = profile_name {
1071            if let Some(profile) = self.profiles.get(p_name) {
1072                profile.settings.apply(&mut settings);
1073            } else if let Some(profile) = builtin_profiles().iter().find(|p| p.name == p_name) {
1074                profile.settings.apply(&mut settings);
1075            }
1076        }
1077
1078        settings
1079    }
1080
1081    /// Get a system prompt preset content by name.
1082    pub fn get_preset_content(&self, name: &str) -> Option<String> {
1083        self.system_prompt_presets
1084            .get(name)
1085            .map(|p| p.content.clone())
1086    }
1087
1088    fn normalize_config(mut config: Config) -> Config {
1089        // normalize models_dirs
1090        for path in &mut config.models_dirs {
1091            let path_str = path.to_string_lossy();
1092            if let Some(stripped) = path_str.strip_prefix("~/") {
1093                let home = dirs::home_dir().unwrap_or_default();
1094                *path = home.join(stripped);
1095            } else if !path.is_absolute() {
1096                let home = dirs::home_dir().unwrap_or_default();
1097                *path = home.join(path_str.as_ref());
1098            }
1099        }
1100
1101        // Merge built-in profiles into in-memory cache (do not persist to disk)
1102        for p in builtin_profiles() {
1103            if config.profiles.get(&p.name).is_none() {
1104                config.profiles.insert_builtin(p);
1105            }
1106        }
1107
1108        // Merge built-in system prompt presets into in-memory cache (do not persist to disk)
1109        for p in builtin_system_prompt_presets() {
1110            if config.system_prompt_presets.get(&p.name).is_none() {
1111                config.system_prompt_presets.insert_builtin(p);
1112            }
1113        }
1114        config
1115    }
1116
1117    fn load_impl(path: &PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1118        let content = std::fs::read_to_string(path)?;
1119        let config: Config = serde_yaml::from_str(&content)
1120            .map_err(|e| format!("Failed to parse config file {}: {}", path.display(), e))?;
1121        let config = Self::normalize_config(config);
1122        let config = config.auto_detect_platform();
1123        let warnings = config.validate();
1124        if !warnings.is_empty() {
1125            eprintln!("Config validation warnings:");
1126            for warning in &warnings {
1127                eprintln!("  - {}", warning);
1128            }
1129        }
1130        Ok(config)
1131    }
1132
1133    pub fn load() -> Result<Self, Box<dyn std::error::Error>> {
1134        let path = Self::config_path();
1135        if path.exists() {
1136            Self::load_impl(&path)
1137        } else {
1138            let mut config = Config::default();
1139            config.save()?;
1140            Ok(config)
1141        }
1142    }
1143
1144    pub fn load_from(path: PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1145        if path.exists() {
1146            Self::load_impl(&path)
1147        } else {
1148            Err(format!("Config file not found: {}", path.display()).into())
1149        }
1150    }
1151
1152    /// Auto-detect the platform if not explicitly set in config.
1153    fn auto_detect_platform(mut self) -> Self {
1154        if self.default.platform.is_none() {
1155            self.default.platform =
1156                Some(
1157                    crate::backend::hardware::platform_name(
1158                        crate::backend::hardware::detect_platform(),
1159                    )
1160                    .to_string(),
1161                );
1162        }
1163        self
1164    }
1165
1166    pub fn save(&mut self) -> Result<(), Box<dyn std::error::Error>> {
1167        let path = Self::config_path();
1168        if let Some(parent) = path.parent() {
1169            std::fs::create_dir_all(parent)?;
1170        }
1171        let content = serde_yaml::to_string(self)?;
1172        std::fs::write(&path, content)?;
1173        // Persist model configs to individual YAML files
1174        let entries: Vec<(String, ModelOverride)> = self
1175            .model_overrides
1176            .keys()
1177            .iter()
1178            .filter_map(|k| self.model_overrides.get(k).map(|v| (k.clone(), v.clone())))
1179            .collect();
1180        for (name, cfg) in entries {
1181            self.model_overrides.save(&name, &cfg);
1182        }
1183        // Persist user profiles to individual YAML files (skip built-ins)
1184        for profile in self.profiles.user_profiles() {
1185            self.profiles.save(&profile);
1186        }
1187        // Persist user presets to individual YAML files (skip built-ins)
1188        for preset in self.system_prompt_presets.user_presets() {
1189            self.system_prompt_presets.save(&preset);
1190        }
1191        Ok(())
1192    }
1193
1194    pub fn merged_profiles(&self) -> Vec<Profile> {
1195        self.profiles.all()
1196    }
1197
1198    pub fn merged_presets(&self) -> Vec<SystemPromptPreset> {
1199        self.system_prompt_presets.all()
1200    }
1201}
1202
1203#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1204pub enum LogLevel {
1205    Info,
1206    Warning,
1207    Error,
1208}
1209
1210impl LogLevel {
1211    pub fn label(&self) -> &'static str {
1212        match self {
1213            LogLevel::Info => "INFO",
1214            LogLevel::Warning => "WARNING",
1215            LogLevel::Error => "ERROR",
1216        }
1217    }
1218}
1219
1220#[derive(Debug, Clone)]
1221pub struct LogEntry {
1222    pub timestamp: String,
1223    pub level: LogLevel,
1224    pub message: String,
1225}
1226
1227impl LogEntry {
1228    pub fn new(message: impl Into<String>, level: LogLevel) -> Self {
1229        let timestamp = Local::now().format("%H:%M:%S").to_string();
1230        let message = sanitize_log(&message.into());
1231        Self {
1232            timestamp,
1233            level,
1234            message,
1235        }
1236    }
1237}
1238
1239/// Sanitize log messages to prevent TUI layout breakages.
1240/// Strips non-printable characters and control sequences, and limits length.
1241fn sanitize_log(input: &str) -> String {
1242    // Limit length to avoid layout/perf issues with massive lines
1243    let max_len = 2000;
1244    let chars: Vec<char> = input.chars().collect();
1245    let truncated = chars.len() > max_len;
1246    let chars = if truncated {
1247        chars[..max_len].to_vec()
1248    } else {
1249        chars
1250    };
1251
1252    let mut output = String::with_capacity(chars.len());
1253    for c in chars {
1254        // Strip ALL control characters except newline and tab.
1255        // Critically: strip \r (carriage return) as it breaks TUI rendering.
1256        if c.is_control() && c != '\n' && c != '\t' {
1257            continue;
1258        }
1259        output.push(c);
1260    }
1261
1262    // Replace tabs with spaces for consistent rendering
1263    let output = output.replace('\t', "    ");
1264
1265    // Final trim to remove trailing junk
1266    let mut result = output.trim_end().to_string();
1267    if truncated {
1268        result.push_str("... (truncated)");
1269    }
1270    result
1271}