llm_manager/
config.rs

1mod model_config;
2mod presets;
3mod profiles;
4mod store;
5
6use std::collections::HashSet;
7use std::path::PathBuf;
8
9use chrono::Local;
10use serde::{Deserialize, Serialize};
11
12pub use model_config::ModelConfigStore;
13
14pub use profiles::ProfileStore;
15
16use crate::models::{
17    Backend, CacheType, CacheTypeK, CacheTypeV, Mirostat, NumMode, RopeScaling, Samplers, SplitMode,
18};
19pub use presets::PresetStore;
20
21/// Resolve the base config directory with a safe fallback chain.
22///
23/// Prefers `dirs::config_dir()` (XDG on Linux, ~/Library/Application Support on macOS,
24/// etc.), falls back to `~/.config`, and lastly `./.llm-manager` if both fail.
25pub fn config_base_dir() -> PathBuf {
26    if let Some(d) = dirs::config_dir() {
27        return d;
28    }
29    if let Some(home) = dirs::home_dir() {
30        return home.join(".config");
31    }
32    PathBuf::from(".").join(".llm-manager")
33}
34
35/// Count physical CPU cores on Linux (ignores hyperthreading).
36/// Falls back to 1 if the file can't be read or parsing fails.
37pub fn physical_cores() -> u32 {
38    let content = match std::fs::read_to_string("/proc/cpuinfo") {
39        Ok(c) => c,
40        Err(_) => {
41            return std::thread::available_parallelism()
42                .map(|p| p.get() as u32)
43                .unwrap_or(1);
44        }
45    };
46    let mut seen = HashSet::new();
47    let mut cur_phys: Option<&str> = None;
48    let mut cur_core: Option<&str> = None;
49    for line in content.lines() {
50        if let Some((key, val)) = line.split_once(':') {
51            let key = key.trim();
52            let val = val.trim();
53            match key {
54                "physical id" => cur_phys = Some(val),
55                "core id" => cur_core = Some(val),
56                _ => {}
57            }
58            if let (Some(phys), Some(core)) = (cur_phys, cur_core) {
59                seen.insert((phys, core));
60            }
61        }
62    }
63    seen.len() as u32
64}
65
66/// A remote RPC worker for distributed inference.
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct RpcWorker {
69    #[serde(default)]
70    pub selected: bool,
71    #[serde(default)]
72    pub name: String,
73    pub ip: String,
74    #[serde(default = "default_rpc_port")]
75    pub port: u16,
76}
77
78fn default_rpc_port() -> u16 {
79    50052
80}
81
82/// Global configuration.
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct Config {
85    pub models_dirs: Vec<PathBuf>,
86    pub llama_server: PathBuf,
87    pub default: DefaultParams,
88    /// Per-model overrides (keyed by model file name, stored as YAML in models/).
89    #[serde(default, skip)]
90    pub model_overrides: ModelConfigStore,
91    /// Named profiles of settings presets (stored as YAML in profiles/).
92    #[serde(default, skip)]
93    pub profiles: ProfileStore,
94    /// System prompt presets (stored as YAML in presets/).
95    #[serde(default, skip)]
96    pub system_prompt_presets: PresetStore,
97    /// RPC Workers for distributed inference.
98    #[serde(default)]
99    pub rpc_workers: Vec<RpcWorker>,
100    /// Number of results per HuggingFace search query.
101    #[serde(default = "default_search_limit")]
102    pub search_limit: u32,
103}
104
105fn default_search_limit() -> u32 {
106    50
107}
108
109/// A named profile of settings.
110#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
111pub struct Profile {
112    pub name: String,
113    /// Brief description shown in the profile list.
114    pub description: String,
115    /// The settings for this profile.
116    #[serde(default)]
117    pub settings: ModelOverride,
118}
119
120impl Profile {
121    /// Apply this profile's settings to a base ModelSettings.
122    pub fn apply(&self, mut base: crate::models::ModelSettings) -> crate::models::ModelSettings {
123        self.settings.apply(&mut base);
124        base
125    }
126}
127
128/// A named system prompt preset.
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct SystemPromptPreset {
131    pub name: String,
132    pub description: String,
133    pub content: String,
134}
135
136/// Built-in system prompt presets.
137pub fn builtin_system_prompt_presets() -> Vec<SystemPromptPreset> {
138    vec![
139        SystemPromptPreset {
140            name: "General".into(),
141            description: "General-purpose assistant".into(),
142            content: "You are a helpful assistant.".into(),
143        },
144        SystemPromptPreset {
145            name: "Coder".into(),
146            description: "Expert software developer".into(),
147            content: "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.".into(),
148        },
149        SystemPromptPreset {
150            name: "Thinker".into(),
151            description: "Analytical and thoughtful".into(),
152            content: "You are a thoughtful and analytical AI assistant. Think carefully before answering. Provide well-reasoned responses with clear explanations.".into(),
153        },
154        SystemPromptPreset {
155            name: "Mathematician".into(),
156            description: "Expert in mathematics".into(),
157            content: "You are an expert in mathematics. Provide clear, step-by-step solutions to mathematical problems. Show your reasoning and explain key concepts.".into(),
158        },
159    ]
160}
161
162#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
163pub struct ModelOverride {
164    // Loading
165    pub context_length: Option<u32>,
166    pub batch_size: Option<u32>,
167    pub ubatch_size: Option<u32>,
168    pub cache_type_k: Option<CacheTypeK>,
169    pub cache_type_v: Option<CacheTypeV>,
170    pub keep: Option<i32>,
171    pub swa_full: Option<bool>,
172    pub mlock: Option<bool>,
173    pub mmap: Option<bool>,
174    pub numa: Option<NumMode>,
175    pub uniform_cache: Option<bool>,
176    pub system_prompt: Option<String>,
177    pub system_prompt_preset_name: Option<String>,
178    pub max_concurrent_predictions: Option<u32>,
179    pub threads: Option<u32>,
180    pub threads_batch: Option<u32>,
181    pub parallel: Option<u32>,
182
183    // GPU
184    pub gpu_layers: Option<i32>,
185    pub split_mode: Option<SplitMode>,
186    pub tensor_split: Option<String>,
187    pub main_gpu: Option<i32>,
188    pub fit: Option<bool>,
189    pub lora: Option<PathBuf>,
190    pub lora_scaled: Option<(PathBuf, f32)>,
191    pub rpc: Option<String>,
192    pub embedding: Option<bool>,
193    pub kv_cache_offload: Option<bool>,
194    pub flash_attn: Option<bool>,
195    pub jinja: Option<bool>,
196    pub chat_template: Option<String>,
197    pub chat_template_kwargs: Option<String>,
198    pub expert_count: Option<i32>,
199    pub gpu_layers_mode: Option<crate::models::GpuLayersMode>,
200
201    // Sampling
202    pub seed: Option<i32>,
203    pub temperature: Option<f32>,
204    pub top_k: Option<i32>,
205    pub top_p: Option<f32>,
206    pub min_p: Option<f32>,
207    pub typical_p: Option<f32>,
208    pub mirostat: Option<Mirostat>,
209    pub mirostat_lr: Option<f32>,
210    pub mirostat_ent: Option<f32>,
211    pub ignore_eos: Option<bool>,
212    pub samplers: Option<Samplers>,
213
214    // Repetition
215    pub repeat_penalty: Option<f32>,
216    pub repeat_last_n: Option<i32>,
217    pub presence_penalty: Option<f32>,
218    pub frequency_penalty: Option<f32>,
219    pub dry_multiplier: Option<f32>,
220    pub dry_base: Option<f32>,
221    pub dry_allowed_length: Option<i32>,
222    pub dry_penalty_last_n: Option<i32>,
223
224    // RoPE
225    pub rope_scaling: Option<RopeScaling>,
226    pub rope_scale: Option<f32>,
227    pub rope_freq_base: Option<f32>,
228    pub rope_freq_scale: Option<f32>,
229    pub rope_yarn_enabled: Option<bool>,
230
231    // Server
232    pub cache_prompt: Option<bool>,
233    pub cache_reuse: Option<u32>,
234    pub webui: Option<bool>,
235
236    // Other
237    pub max_tokens: Option<u32>,
238    pub cache_type: Option<CacheType>,
239    pub llama_cpp_version_cpu: Option<String>,
240    pub llama_cpp_version_vulkan: Option<String>,
241    pub llama_cpp_version_rocm: Option<String>,
242    pub llama_cpp_version_rocm_lemonade: Option<String>,
243    pub llama_cpp_version_cuda: Option<String>,
244    pub spec_type: Option<String>,
245    pub draft_tokens: Option<u32>,
246    pub tags: Option<Vec<String>>,
247}
248
249/// Apply a scalar Copy field from override: `base.f = self.f.unwrap_or(base.f)`.
250macro_rules! apply_scalar {
251    ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
252        $(
253            $base.$field = $self.$field.unwrap_or($base.$field);
254        )+
255    };
256}
257
258/// Apply a Clone field from override: `if let Some(v) = &self.f { base.f = v.clone(); }`.
259macro_rules! apply_clone {
260    ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
261        $(
262            if let Some(v) = &$self.$field {
263                $base.$field = v.clone();
264            }
265        )+
266    };
267}
268
269/// Apply an Option<T> field from override: `if let Some(v) = &self.f { base.f = Some(v.clone()); }`.
270macro_rules! apply_option {
271    ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
272        $(
273            if let Some(v) = &$self.$field {
274                $base.$field = Some(v.clone());
275            }
276        )+
277    };
278}
279
280impl ModelOverride {
281    pub fn from_settings(s: &crate::models::ModelSettings) -> Self {
282        Self {
283            context_length: Some(s.context_length),
284            batch_size: Some(s.batch_size),
285            ubatch_size: Some(s.ubatch_size),
286            cache_type_k: s.cache_type_k,
287            cache_type_v: s.cache_type_v,
288            keep: Some(s.keep),
289            swa_full: Some(s.swa_full),
290            mlock: Some(s.mlock),
291            mmap: Some(s.mmap),
292            numa: Some(s.numa),
293            uniform_cache: Some(s.uniform_cache),
294            system_prompt: Some(s.system_prompt.clone()),
295            system_prompt_preset_name: Some(s.system_prompt_preset_name.clone()),
296            max_concurrent_predictions: s.max_concurrent_predictions,
297            threads: Some(s.threads),
298            threads_batch: Some(s.threads_batch),
299            parallel: Some(s.parallel),
300            gpu_layers: Some(match s.gpu_layers_mode {
301                crate::models::GpuLayersMode::Auto => 0,
302                crate::models::GpuLayersMode::Specific(n) => n as i32,
303                crate::models::GpuLayersMode::All => -1,
304            }),
305            gpu_layers_mode: Some(s.gpu_layers_mode),
306            split_mode: Some(s.split_mode),
307            tensor_split: Some(s.tensor_split.clone()),
308            main_gpu: Some(s.main_gpu),
309            fit: Some(s.fit),
310            lora: s.lora.clone(),
311            lora_scaled: s.lora_scaled.clone(),
312            rpc: Some(s.rpc.clone()),
313            embedding: Some(s.embedding),
314            kv_cache_offload: Some(s.kv_cache_offload),
315            flash_attn: Some(s.flash_attn),
316            jinja: Some(s.jinja),
317            chat_template: s.chat_template.clone(),
318            chat_template_kwargs: s.chat_template_kwargs.clone(),
319            expert_count: Some(s.expert_count),
320            seed: Some(s.seed),
321            temperature: Some(s.temperature),
322            top_k: Some(s.top_k),
323            top_p: Some(s.top_p),
324            min_p: Some(s.min_p),
325            typical_p: Some(s.typical_p),
326            mirostat: Some(s.mirostat),
327            mirostat_lr: Some(s.mirostat_lr),
328            mirostat_ent: Some(s.mirostat_ent),
329            ignore_eos: Some(s.ignore_eos),
330            samplers: Some(s.samplers.clone()),
331            repeat_penalty: Some(s.repeat_penalty),
332            repeat_last_n: Some(s.repeat_last_n),
333            presence_penalty: s.presence_penalty,
334            frequency_penalty: s.frequency_penalty,
335            dry_multiplier: Some(s.dry_multiplier),
336            dry_base: Some(s.dry_base),
337            dry_allowed_length: Some(s.dry_allowed_length),
338            dry_penalty_last_n: Some(s.dry_penalty_last_n),
339            rope_scaling: Some(s.rope_scaling),
340            rope_scale: Some(s.rope_scale),
341            rope_freq_base: Some(s.rope_freq_base),
342            rope_freq_scale: Some(s.rope_freq_scale),
343            rope_yarn_enabled: Some(s.rope_yarn_enabled),
344            cache_prompt: Some(s.cache_prompt),
345            cache_reuse: Some(s.cache_reuse),
346            webui: Some(s.webui),
347            max_tokens: s.max_tokens,
348            cache_type: Some(s.cache_type),
349            llama_cpp_version_cpu: s.llama_cpp_version_cpu.clone(),
350            llama_cpp_version_vulkan: s.llama_cpp_version_vulkan.clone(),
351            llama_cpp_version_rocm: s.llama_cpp_version_rocm.clone(),
352            llama_cpp_version_rocm_lemonade: s.llama_cpp_version_rocm_lemonade.clone(),
353            llama_cpp_version_cuda: s.llama_cpp_version_cuda.clone(),
354            spec_type: Some(s.spec_type.clone()),
355            draft_tokens: Some(s.draft_tokens),
356           tags: Some(s.tags.clone()),
357        }
358    }
359
360    /// Merge override into a base ModelSettings (in-place).
361    pub fn apply(&self, base: &mut crate::models::ModelSettings) {
362        // Override values always take precedence. For Option<T> fields,
363        // the override value (even None) is explicitly set by the user.
364
365        // Scalar Copy fields: base.f = self.f.unwrap_or(base.f)
366        apply_scalar!(self, base,
367            context_length, batch_size, ubatch_size, keep, swa_full, mlock, mmap,
368            numa, uniform_cache, kv_cache_offload, threads, threads_batch, parallel,
369            split_mode, main_gpu, fit, embedding, flash_attn, jinja, expert_count,
370            seed, temperature, top_k, top_p, min_p, typical_p,
371            mirostat, mirostat_lr, mirostat_ent, ignore_eos,
372            repeat_penalty, repeat_last_n,
373            dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
374            rope_scaling, rope_scale, rope_freq_base, rope_freq_scale, rope_yarn_enabled,
375            cache_prompt, cache_reuse, webui, cache_type,
376            draft_tokens, gpu_layers_mode,
377        );
378
379        // Cloneable fields: if let Some(v) = &self.f { base.f = v.clone(); }
380        apply_clone!(self, base,
381            system_prompt, system_prompt_preset_name, tensor_split, rpc,
382            samplers, spec_type, tags,
383        );
384
385        // Option<T> fields: if let Some(v) = &self.f { base.f = Some(v.clone()); }
386        apply_option!(self, base,
387            lora, lora_scaled, chat_template, chat_template_kwargs,
388            llama_cpp_version_cpu, llama_cpp_version_vulkan,
389            llama_cpp_version_rocm, llama_cpp_version_rocm_lemonade,
390            llama_cpp_version_cuda,
391        );
392
393        // Direct Option<T> assignment (same type in both structs)
394        base.cache_type_k = self.cache_type_k;
395        base.cache_type_v = self.cache_type_v;
396        base.presence_penalty = self.presence_penalty;
397        base.frequency_penalty = self.frequency_penalty;
398        base.max_tokens = self.max_tokens;
399
400        // Special: max_concurrent_predictions uses or() for Option chaining
401        base.max_concurrent_predictions = self
402            .max_concurrent_predictions
403            .or(base.max_concurrent_predictions);
404
405        // Special: gpu_layers converts i32 legacy field to GpuLayersMode enum
406        // Only applies when gpu_layers is explicitly set in the override.
407        if let Some(n) = self.gpu_layers {
408            base.gpu_layers_mode = match n {
409                n if n < 0 => crate::models::GpuLayersMode::All,
410                n => crate::models::GpuLayersMode::Specific(n as u32),
411            };
412        }
413
414        // FIELD ACCOUNTING (ModelOverride: 87 fields):
415        // - apply_scalar: 53 fields
416        // - apply_clone: 7 fields
417        // - apply_option: 10 fields
418        // - direct Option assign: 5 fields (cache_type_k, cache_type_v, presence_penalty,
419        //   frequency_penalty, max_tokens)
420        // - special: 1 field (max_concurrent_predictions)
421        // - conditional: gpu_layers overrides gpu_layers_mode only when Some
422        // - NOT in ModelSettings: 0 (all ModelOverride fields mapped above)
423        //
424        // ModelSettings fields NOT in ModelOverride (not overridable):
425        // host, port, timeout, backend, platform, router_max_models, server_mode,
426        // api_endpoint_enabled, api_endpoint_port
427        //
428        // When adding a field: ensure it appears in exactly one category above.
429    }
430}
431
432/// Built-in profiles with sensible defaults for popular model families.
433pub fn builtin_profiles() -> Vec<Profile> {
434    vec![
435        Profile {
436            name: "Qwen".into(),
437            description: "Optimized for Qwen models (dense)".into(),
438            settings: ModelOverride {
439                context_length: Some(131072),
440                temperature: Some(0.7),
441                top_k: Some(20),
442                top_p: Some(0.95),
443                max_tokens: Some(4096),
444                presence_penalty: Some(0.0),
445                uniform_cache: Some(true),
446                jinja: Some(true),
447                ..Default::default()
448            },
449        },
450        Profile {
451            name: "Qwen-MoE".into(),
452            description: "Optimized for Qwen MoE models (35B-A3B)".into(),
453            settings: ModelOverride {
454                context_length: Some(131072),
455                temperature: Some(0.8),
456                top_k: Some(20),
457                top_p: Some(0.95),
458                max_tokens: Some(4096),
459                presence_penalty: Some(1.5),
460                uniform_cache: Some(true),
461                jinja: Some(true),
462                ..Default::default()
463            },
464        },
465        Profile {
466            name: "Qwen-Coding".into(),
467            description: "Optimized for Qwen models in coding mode".into(),
468            settings: ModelOverride {
469                context_length: Some(131072),
470                temperature: Some(0.6),
471                top_k: Some(20),
472                top_p: Some(0.95),
473                max_tokens: Some(4096),
474                presence_penalty: Some(0.0),
475                uniform_cache: Some(true),
476                jinja: Some(true),
477                ..Default::default()
478            },
479        },
480        Profile {
481            name: "Gemma".into(),
482            description: "Optimized for Gemma 2/4 models".into(),
483            settings: ModelOverride {
484                context_length: Some(131072),
485                min_p: Some(0.1),
486                temperature: Some(1.0),
487                top_k: Some(65),
488                top_p: Some(0.95),
489                max_tokens: Some(4096),
490                uniform_cache: Some(true),
491                jinja: Some(true),
492                ..Default::default()
493            },
494        },
495        Profile {
496            name: "Llama".into(),
497            description: "Optimized for Llama 3.1/3.3 models".into(),
498            settings: ModelOverride {
499                context_length: Some(131072),
500                temperature: Some(0.7),
501                top_p: Some(0.9),
502                repeat_penalty: Some(1.1),
503                max_tokens: Some(4096),
504                uniform_cache: Some(true),
505                jinja: Some(true),
506                ..Default::default()
507            },
508        },
509        Profile {
510            name: "Mistral".into(),
511            description: "Optimized for Mistral 7B/NeMo models".into(),
512            settings: ModelOverride {
513                context_length: Some(131072),
514                temperature: Some(0.7),
515                top_k: Some(50),
516                top_p: Some(0.9),
517                max_tokens: Some(4096),
518                uniform_cache: Some(true),
519                jinja: Some(true),
520                ..Default::default()
521            },
522        },
523        Profile {
524            name: "Phi".into(),
525            description: "Optimized for Phi 3.5 Mini models".into(),
526            settings: ModelOverride {
527                context_length: Some(131072),
528                temperature: Some(0.7),
529                top_k: Some(50),
530                top_p: Some(0.9),
531                repeat_penalty: Some(1.1),
532                max_tokens: Some(4096),
533                uniform_cache: Some(true),
534                ..Default::default()
535            },
536        },
537    ]
538}
539
540#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
541#[serde(default)]
542pub struct DefaultParams {
543    // Loading
544    #[serde(default)]
545    pub context_length: u32,
546    #[serde(default)]
547    pub threads: u32,
548    #[serde(default)]
549    pub threads_batch: u32,
550    #[serde(default)]
551    pub batch_size: u32,
552    #[serde(default)]
553    pub ubatch_size: u32,
554    #[serde(default = "default_cache_type_k")]
555    pub cache_type_k: Option<CacheTypeK>,
556    #[serde(default = "default_cache_type_v")]
557    pub cache_type_v: Option<CacheTypeV>,
558    #[serde(default)]
559    pub keep: i32,
560    #[serde(default)]
561    pub swa_full: bool,
562    #[serde(default)]
563    pub mlock: bool,
564    #[serde(default)]
565    pub mmap: bool,
566    #[serde(default)]
567    pub numa: NumMode,
568    #[serde(default)]
569    pub uniform_cache: bool,
570    #[serde(default)]
571    pub kv_cache_offload: bool,
572    #[serde(default)]
573    pub parallel: u32,
574    #[serde(default)]
575    pub max_concurrent_predictions: Option<u32>,
576    #[serde(default)]
577    pub system_prompt: String,
578    #[serde(default = "default_system_prompt_preset_name")]
579    pub system_prompt_preset_name: String,
580    // GPU
581    #[serde(default)]
582    pub gpu_layers: i32,
583    #[serde(default = "default_gpu_layers_mode")]
584    pub gpu_layers_mode: crate::models::GpuLayersMode,
585    #[serde(default)]
586    pub split_mode: SplitMode,
587    #[serde(default)]
588    pub tensor_split: String,
589    #[serde(default)]
590    pub main_gpu: i32,
591    #[serde(default)]
592    pub fit: bool,
593    #[serde(default)]
594    pub lora: Option<PathBuf>,
595    #[serde(default)]
596    pub lora_scaled: Option<(PathBuf, f32)>,
597    #[serde(default)]
598    pub rpc: String,
599    #[serde(default)]
600    pub embedding: bool,
601    #[serde(default)]
602    pub flash_attn: bool,
603    #[serde(default)]
604    pub jinja: bool,
605    #[serde(default)]
606    pub chat_template: Option<String>,
607    #[serde(default)]
608    pub chat_template_kwargs: Option<String>,
609    #[serde(default)]
610    pub expert_count: i32,
611
612    // Sampling
613    #[serde(default)]
614    pub seed: i32,
615    #[serde(default)]
616    pub temperature: f32,
617    #[serde(default)]
618    pub top_k: i32,
619    #[serde(default)]
620    pub top_p: f32,
621    #[serde(default)]
622    pub min_p: f32,
623    #[serde(default)]
624    pub typical_p: f32,
625    #[serde(default)]
626    pub mirostat: Mirostat,
627    #[serde(default)]
628    pub mirostat_lr: f32,
629    #[serde(default)]
630    pub mirostat_ent: f32,
631    #[serde(default)]
632    pub ignore_eos: bool,
633    #[serde(default)]
634    pub samplers: Samplers,
635
636    // Repetition
637    #[serde(default)]
638    pub repeat_penalty: f32,
639    #[serde(default)]
640    pub repeat_last_n: i32,
641    #[serde(default = "default_presence_penalty")]
642    pub presence_penalty: Option<f32>,
643    #[serde(default = "default_frequency_penalty")]
644    pub frequency_penalty: Option<f32>,
645    #[serde(default)]
646    pub dry_multiplier: f32,
647    #[serde(default)]
648    pub dry_base: f32,
649    #[serde(default)]
650    pub dry_allowed_length: i32,
651    #[serde(default)]
652    pub dry_penalty_last_n: i32,
653
654    // RoPE
655    #[serde(default)]
656    pub rope_scaling: RopeScaling,
657    #[serde(default)]
658    pub rope_scale: f32,
659    #[serde(default)]
660    pub rope_freq_base: f32,
661    #[serde(default)]
662    pub rope_freq_scale: f32,
663    #[serde(default)]
664    pub rope_yarn_enabled: bool,
665
666    // Server
667    #[serde(default)]
668    pub host: String,
669    #[serde(default)]
670    pub port: u16,
671    #[serde(default)]
672    pub timeout: u32,
673    #[serde(default = "default_cache_prompt")]
674    pub cache_prompt: bool,
675    #[serde(default)]
676    pub cache_reuse: u32,
677    #[serde(default)]
678    pub webui: bool,
679    #[serde(default)]
680    pub ws_server_enabled: bool,
681    #[serde(default = "default_ws_server_port")]
682    pub ws_server_port: u16,
683    #[serde(default)]
684    pub ws_server_auth_key: Option<String>,
685    #[serde(default = "default_ws_server_tls_enabled")]
686    pub ws_server_tls_enabled: bool,
687    #[serde(default)]
688    pub ws_server_tls_cert: Option<String>,
689    #[serde(default)]
690    pub ws_server_tls_key: Option<String>,
691    #[serde(default)]
692    pub router_max_models: u32,
693    #[serde(default)]
694    pub server_mode: crate::models::ServerMode,
695
696    // Other
697    #[serde(default = "default_max_tokens")]
698    pub max_tokens: Option<u32>,
699    #[serde(default)]
700    pub cache_type: CacheType,
701    #[serde(default)]
702    pub backend: Backend,
703    /// Platform override: "linux", "windows", or "macos". If None, auto-detected.
704    #[serde(default)]
705    pub platform: Option<String>,
706    #[serde(default)]
707    pub llama_cpp_version_cpu: Option<String>,
708    #[serde(default)]
709    pub llama_cpp_version_vulkan: Option<String>,
710    #[serde(default)]
711    pub llama_cpp_version_rocm: Option<String>,
712    #[serde(default)]
713    pub llama_cpp_version_rocm_lemonade: Option<String>,
714    #[serde(default)]
715    pub llama_cpp_version_cuda: Option<String>,
716
717    // API
718    #[serde(default)]
719    pub api_endpoint_enabled: bool,
720    #[serde(default = "default_api_endpoint_port")]
721    pub api_endpoint_port: u16,
722    #[serde(default)]
723    pub spec_type: String,
724    #[serde(default)]
725    pub draft_tokens: u32,
726    #[serde(default)]
727    pub tags: Vec<String>,
728}
729
730fn default_api_endpoint_port() -> u16 {
731    49222
732}
733
734fn default_system_prompt_preset_name() -> String {
735    "General".to_string()
736}
737
738fn default_cache_type_k() -> Option<CacheTypeK> {
739    None
740}
741fn default_cache_type_v() -> Option<CacheTypeV> {
742    None
743}
744fn default_presence_penalty() -> Option<f32> {
745    None
746}
747fn default_frequency_penalty() -> Option<f32> {
748    None
749}
750fn default_max_tokens() -> Option<u32> {
751    None
752}
753fn default_cache_prompt() -> bool {
754    true
755}
756fn default_ws_server_port() -> u16 {
757     49223
758 }
759 fn default_ws_server_tls_enabled() -> bool {
760     true
761 }
762 fn default_gpu_layers_mode() -> crate::models::GpuLayersMode {
763    crate::models::GpuLayersMode::Auto
764}
765
766impl Default for DefaultParams {
767    fn default() -> Self {
768        Self {
769            // Loading
770            context_length: 131072,
771            threads: physical_cores(),
772            threads_batch: 8,
773            batch_size: 512,
774            ubatch_size: 512,
775            cache_type_k: None,
776            cache_type_v: None,
777            keep: 0,
778            swa_full: false,
779            mlock: false,
780            mmap: true,
781            numa: NumMode::None,
782            uniform_cache: true,
783            kv_cache_offload: true,
784            parallel: 1,
785            max_concurrent_predictions: None,
786            system_prompt: "You are a helpful assistant.".to_string(),
787            system_prompt_preset_name: "General".to_string(),
788
789            // GPU
790            gpu_layers: -1,
791            gpu_layers_mode: crate::models::GpuLayersMode::Auto,
792            split_mode: SplitMode::Layer,
793            tensor_split: String::new(),
794            main_gpu: 0,
795            fit: true,
796            lora: None,
797            lora_scaled: None,
798            rpc: String::new(),
799            embedding: false,
800            flash_attn: true,
801            jinja: true,
802            chat_template: None,
803            chat_template_kwargs: None,
804            expert_count: -1,
805
806            // Sampling
807            seed: -1,
808            temperature: 0.8,
809            top_k: 40,
810            top_p: 0.95,
811            min_p: 0.0,
812            typical_p: 1.0,
813            mirostat: Mirostat::Off,
814            mirostat_lr: 0.1,
815            mirostat_ent: 5.0,
816            ignore_eos: false,
817            samplers: Samplers::default(),
818
819            // Repetition
820            repeat_penalty: 1.1,
821            repeat_last_n: 64,
822            presence_penalty: None,
823            frequency_penalty: None,
824            dry_multiplier: 0.0,
825            dry_base: 1.75,
826            dry_allowed_length: 2,
827            dry_penalty_last_n: -1,
828
829            // RoPE
830            rope_scaling: RopeScaling::None,
831            rope_scale: 1.0,
832            rope_freq_base: 0.0,
833            rope_freq_scale: 1.0,
834            rope_yarn_enabled: false,
835
836            // Server
837            host: "127.0.0.1".to_string(),
838            port: 8080,
839            timeout: 600,
840            cache_prompt: true,
841            cache_reuse: 0,
842            webui: false,
843            ws_server_enabled: false,
844            ws_server_port: 49223,
845            ws_server_auth_key: None,
846            ws_server_tls_enabled: true,
847            ws_server_tls_cert: None,
848            ws_server_tls_key: None,
849            router_max_models: 4,
850            server_mode: crate::models::ServerMode::Normal,
851
852            // Other
853            max_tokens: None,
854            cache_type: CacheType::F16,
855            backend: {
856                use crate::backend::hardware::{GpuVendor, detect_gpu_vendors};
857                let vendors = detect_gpu_vendors();
858                let mut result = Backend::Cpu;
859                for v in &vendors {
860                    if matches!(v, GpuVendor::Nvidia) {
861                        result = Backend::Cuda;
862                        break;
863                    }
864                    if matches!(v, GpuVendor::Amd) {
865                        result = Backend::Rocm;
866                        break;
867                    }
868                    if matches!(v, GpuVendor::Intel) {
869                        result = Backend::Vulkan;
870                        break;
871                    }
872                }
873                result
874            },
875            platform: None,
876            llama_cpp_version_cpu: None,
877            llama_cpp_version_vulkan: None,
878            llama_cpp_version_rocm: None,
879            llama_cpp_version_rocm_lemonade: None,
880            llama_cpp_version_cuda: None,
881            api_endpoint_enabled: false,
882            api_endpoint_port: 49222,
883            spec_type: String::new(),
884            draft_tokens: 0,
885            tags: Vec::new(),
886        }
887    }
888}
889
890impl Default for Config {
891    fn default() -> Self {
892        Self {
893            models_dirs: vec![
894                dirs::data_dir()
895                    .unwrap_or_default()
896                    .join("llm-manager")
897                    .join("models"),
898            ],
899            llama_server: "llama-server".into(),
900            default: DefaultParams::default(),
901            model_overrides: Default::default(),
902            profiles: Default::default(),
903            system_prompt_presets: Default::default(),
904            rpc_workers: Vec::new(),
905            search_limit: default_search_limit(),
906        }
907    }
908}
909
910impl Config {
911    pub fn config_path() -> PathBuf {
912        config_base_dir()
913            .join("llm-manager")
914            .join("config.yaml")
915    }
916
917    /// Validate config values and return a list of warnings for invalid entries.
918    pub fn validate(&self) -> Vec<String> {
919        let mut warnings = Vec::new();
920        let default = &self.default;
921
922        // Numeric range checks
923        if default.context_length < 512 || default.context_length > 131072 {
924            warnings.push(format!(
925                "context_length {} is outside recommended range 512-131072",
926                default.context_length
927            ));
928        }
929        if default.temperature < 0.0 || default.temperature > 2.0 {
930            warnings.push(format!(
931                "temperature {} is outside recommended range 0.0-2.0",
932                default.temperature
933            ));
934        }
935        if (default.top_p < 0.0 || default.top_p > 1.0) && default.top_p != 0.0 {
936            warnings.push(format!(
937                "top_p {} is outside recommended range 0.0-1.0",
938                default.top_p
939            ));
940        }
941        if (default.repeat_penalty < 0.0 || default.repeat_penalty > 3.0)
942            && default.repeat_penalty != 1.0
943        {
944            warnings.push(format!(
945                "repeat_penalty {} is outside recommended range 0.0-3.0",
946                default.repeat_penalty
947            ));
948        }
949        if default.mirostat_lr < 0.0 || default.mirostat_lr > 1.0 {
950            warnings.push(format!(
951                "mirostat_lr {} is outside recommended range 0.0-1.0",
952                default.mirostat_lr
953            ));
954        }
955        if default.mirostat_ent < 0.0 || default.mirostat_ent > 10.0 {
956            warnings.push(format!(
957                "mirostat_ent {} is outside recommended range 0.0-10.0",
958                default.mirostat_ent
959            ));
960        }
961
962        if default.timeout < 1 {
963            warnings.push(format!(
964                "timeout {} must be at least 1 second",
965                default.timeout
966            ));
967        }
968
969        // Path validation
970        if let Some(lora) = &default.lora
971            && !lora.exists() {
972                warnings.push(format!("lora path {} does not exist", lora.display()));
973            }
974        if let Some((lora, _)) = &default.lora_scaled
975            && !lora.exists() {
976                warnings.push(format!("lora path {} does not exist", lora.display()));
977            }
978
979        // Model override validation
980        for model_name in self.model_overrides.keys() {
981            if let Some(override_settings) = self.model_overrides.get(model_name.as_str()) {
982                if let Some(lora) = &override_settings.lora
983                    && !lora.exists() {
984                        warnings.push(format!(
985                            "model '{}' lora path {} does not exist",
986                            model_name,
987                            lora.display()
988                        ));
989                    }
990                if let Some((lora, _)) = &override_settings.lora_scaled
991                    && !lora.exists() {
992                        warnings.push(format!(
993                            "model '{}' lora path {} does not exist",
994                            model_name,
995                            lora.display()
996                        ));
997                    }
998            }
999        }
1000
1001        warnings
1002    }
1003
1004    /// Resolve settings for a specific model and profile.
1005    pub fn resolve_settings(
1006        &self,
1007        model_name: Option<&str>,
1008        profile_name: Option<&str>,
1009    ) -> crate::models::ModelSettings {
1010        let mut settings = crate::models::ModelSettings::from_config(self);
1011
1012        // Apply model-specific override
1013        if let Some(name) = model_name
1014            && let Some(override_settings) = self.model_overrides.get(name)
1015        {
1016            override_settings.apply(&mut settings);
1017        }
1018
1019        // Apply profile override if specified
1020        if let Some(p_name) = profile_name {
1021            if let Some(profile) = self.profiles.get(p_name) {
1022                profile.settings.apply(&mut settings);
1023            } else if let Some(profile) = builtin_profiles().iter().find(|p| p.name == p_name) {
1024                profile.settings.apply(&mut settings);
1025            }
1026        }
1027
1028        settings
1029    }
1030
1031    /// Get a system prompt preset content by name.
1032    pub fn get_preset_content(&self, name: &str) -> Option<String> {
1033        self.system_prompt_presets
1034            .get(name)
1035            .map(|p| p.content.clone())
1036    }
1037
1038    fn normalize_config(mut config: Config) -> Config {
1039        // normalize models_dirs
1040        for path in &mut config.models_dirs {
1041            let path_str = path.to_string_lossy();
1042            if let Some(stripped) = path_str.strip_prefix("~/") {
1043                let home = dirs::home_dir().unwrap_or_default();
1044                *path = home.join(stripped);
1045            } else if !path.is_absolute() {
1046                let home = dirs::home_dir().unwrap_or_default();
1047                *path = home.join(path_str.as_ref());
1048            }
1049        }
1050
1051        // Merge built-in profiles into in-memory cache (do not persist to disk)
1052        for p in builtin_profiles() {
1053            if config.profiles.get(&p.name).is_none() {
1054                config.profiles.insert_builtin(p);
1055            }
1056        }
1057
1058        // Merge built-in system prompt presets into in-memory cache (do not persist to disk)
1059        for p in builtin_system_prompt_presets() {
1060            if config.system_prompt_presets.get(&p.name).is_none() {
1061                config.system_prompt_presets.insert_builtin(p);
1062            }
1063        }
1064        config
1065    }
1066
1067    fn load_impl(path: &PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1068        let content = std::fs::read_to_string(path)?;
1069        let config: Config = serde_yaml::from_str(&content)
1070            .map_err(|e| format!("Failed to parse config file {}: {}", path.display(), e))?;
1071        let config = Self::normalize_config(config);
1072        let config = config.auto_detect_platform();
1073        let warnings = config.validate();
1074        if !warnings.is_empty() {
1075            eprintln!("Config validation warnings:");
1076            for warning in &warnings {
1077                eprintln!("  - {}", warning);
1078            }
1079        }
1080        Ok(config)
1081    }
1082
1083    pub fn load() -> Result<Self, Box<dyn std::error::Error>> {
1084        let path = Self::config_path();
1085        if path.exists() {
1086            Self::load_impl(&path)
1087        } else {
1088            let mut config = Config::default();
1089            config.save()?;
1090            Ok(config)
1091        }
1092    }
1093
1094    pub fn load_from(path: PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1095        if path.exists() {
1096            Self::load_impl(&path)
1097        } else {
1098            Err(format!("Config file not found: {}", path.display()).into())
1099        }
1100    }
1101
1102    /// Auto-detect the platform if not explicitly set in config.
1103    fn auto_detect_platform(mut self) -> Self {
1104        if self.default.platform.is_none() {
1105            self.default.platform =
1106                Some(
1107                    crate::backend::hardware::platform_name(
1108                        crate::backend::hardware::detect_platform(),
1109                    )
1110                    .to_string(),
1111                );
1112        }
1113        self
1114    }
1115
1116    pub fn save(&mut self) -> Result<(), Box<dyn std::error::Error>> {
1117        let path = Self::config_path();
1118        if let Some(parent) = path.parent() {
1119            std::fs::create_dir_all(parent)?;
1120        }
1121        let content = serde_yaml::to_string(self)?;
1122        std::fs::write(&path, content)?;
1123        // Persist model configs to individual YAML files
1124        let entries: Vec<(String, ModelOverride)> = self
1125            .model_overrides
1126            .keys()
1127            .iter()
1128            .filter_map(|k| self.model_overrides.get(k).map(|v| (k.clone(), v.clone())))
1129            .collect();
1130        for (name, cfg) in entries {
1131            self.model_overrides.save(&name, &cfg);
1132        }
1133        // Persist user profiles to individual YAML files (skip built-ins)
1134        for profile in self.profiles.user_profiles() {
1135            self.profiles.save(&profile);
1136        }
1137        // Persist user presets to individual YAML files (skip built-ins)
1138        for preset in self.system_prompt_presets.user_presets() {
1139            self.system_prompt_presets.save(&preset);
1140        }
1141        Ok(())
1142    }
1143
1144    pub fn merged_profiles(&self) -> Vec<Profile> {
1145        self.profiles.all()
1146    }
1147
1148    pub fn merged_presets(&self) -> Vec<SystemPromptPreset> {
1149        self.system_prompt_presets.all()
1150    }
1151}
1152
1153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1154pub enum LogLevel {
1155    Info,
1156    Warning,
1157    Error,
1158}
1159
1160impl LogLevel {
1161    pub fn label(&self) -> &'static str {
1162        match self {
1163            LogLevel::Info => "INFO",
1164            LogLevel::Warning => "WARNING",
1165            LogLevel::Error => "ERROR",
1166        }
1167    }
1168}
1169
1170#[derive(Debug, Clone)]
1171pub struct LogEntry {
1172    pub timestamp: String,
1173    pub level: LogLevel,
1174    pub message: String,
1175}
1176
1177impl LogEntry {
1178    pub fn new(message: impl Into<String>, level: LogLevel) -> Self {
1179        let timestamp = Local::now().format("%H:%M:%S").to_string();
1180        let message = sanitize_log(&message.into());
1181        Self {
1182            timestamp,
1183            level,
1184            message,
1185        }
1186    }
1187}
1188
1189/// Sanitize log messages to prevent TUI layout breakages.
1190/// Strips non-printable characters and control sequences, and limits length.
1191fn sanitize_log(input: &str) -> String {
1192    // Limit length to avoid layout/perf issues with massive lines
1193    let max_len = 2000;
1194    let chars: Vec<char> = input.chars().collect();
1195    let truncated = chars.len() > max_len;
1196    let chars = if truncated {
1197        chars[..max_len].to_vec()
1198    } else {
1199        chars
1200    };
1201
1202    let mut output = String::with_capacity(chars.len());
1203    for c in chars {
1204        // Strip ALL control characters except newline and tab.
1205        // Critically: strip \r (carriage return) as it breaks TUI rendering.
1206        if c.is_control() && c != '\n' && c != '\t' {
1207            continue;
1208        }
1209        output.push(c);
1210    }
1211
1212    // Replace tabs with spaces for consistent rendering
1213    let output = output.replace('\t', "    ");
1214
1215    // Final trim to remove trailing junk
1216    let mut result = output.trim_end().to_string();
1217    if truncated {
1218        result.push_str("... (truncated)");
1219    }
1220    result
1221}
llm_manager/config.rs

llm_manager/
config.rs