llm_manager/
config.rs

1mod model_config;
2mod presets;
3mod profiles;
4mod store;
5
6use std::collections::HashSet;
7use std::path::PathBuf;
8
9use chrono::Local;
10use serde::{Deserialize, Serialize};
11
12#[allow(unused_imports)]
13pub use model_config::{display_from_key, key_from_display, ModelConfigStore};
14
15pub use profiles::ProfileStore;
16
17use crate::models::{
18    Backend, CacheType, CacheTypeK, CacheTypeV, Mirostat, NumMode, RopeScaling, Samplers, SplitMode,
19};
20use crate::tui::app::ActivePanel;
21pub use presets::PresetStore;
22
23/// Default system prompt used when no preset is selected.
24pub const DEFAULT_SYSTEM_PROMPT: &str = "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.";
25
26/// Resolve the base config directory with a safe fallback chain.
27///
28/// Prefers `dirs::config_dir()` (XDG on Linux, ~/Library/Application Support on macOS,
29/// etc.), falls back to `~/.config`, and lastly `./.llm-manager` if both fail.
30pub fn config_base_dir() -> PathBuf {
31    if let Some(d) = dirs::config_dir() {
32        return d;
33    }
34    if let Some(home) = dirs::home_dir() {
35        return home.join(".config");
36    }
37    PathBuf::from(".").join(".llm-manager")
38}
39
40/// Count physical CPU cores on Linux (ignores hyperthreading).
41/// Falls back to 1 if the file can't be read or parsing fails.
42pub fn physical_cores() -> u32 {
43    let content = match std::fs::read_to_string("/proc/cpuinfo") {
44        Ok(c) => c,
45        Err(_) => {
46            return std::thread::available_parallelism()
47                .map(|p| p.get() as u32)
48                .unwrap_or(1);
49        }
50    };
51    let mut seen = HashSet::new();
52    let mut cur_phys: Option<&str> = None;
53    let mut cur_core: Option<&str> = None;
54    for line in content.lines() {
55        if let Some((key, val)) = line.split_once(':') {
56            let key = key.trim();
57            let val = val.trim();
58            match key {
59                "physical id" => cur_phys = Some(val),
60                "core id" => cur_core = Some(val),
61                _ => {}
62            }
63            if let (Some(phys), Some(core)) = (cur_phys, cur_core) {
64                seen.insert((phys, core));
65            }
66        }
67    }
68    seen.len() as u32
69}
70
71/// A remote RPC worker for distributed inference.
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct RpcWorker {
74    #[serde(default)]
75    pub selected: bool,
76    #[serde(default)]
77    pub name: String,
78    pub ip: String,
79    #[serde(default = "default_rpc_port")]
80    pub port: u16,
81}
82
83fn default_rpc_port() -> u16 {
84    50052
85}
86
87/// Global configuration.
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct Config {
90    pub models_dirs: Vec<PathBuf>,
91    pub llama_server: PathBuf,
92    pub default: DefaultParams,
93    /// Per-model overrides (keyed by display_name/path relative to model dir, stored as YAML in models/).
94    #[serde(default, skip)]
95    pub model_overrides: ModelConfigStore,
96    /// Named profiles of settings presets (stored as YAML in profiles/).
97    #[serde(default, skip)]
98    pub profiles: ProfileStore,
99    /// System prompt presets (stored as YAML in presets/).
100    #[serde(default, skip)]
101    pub system_prompt_presets: PresetStore,
102    /// RPC Workers for distributed inference.
103    #[serde(default)]
104    pub rpc_workers: Vec<RpcWorker>,
105    /// Number of results per HuggingFace search query.
106    #[serde(default = "default_search_limit")]
107    pub search_limit: u32,
108    /// The last focused panel position (for restoring on next launch).
109    #[serde(default)]
110    pub active_panel: crate::tui::app::ActivePanel,
111    /// Left panel width percentage (20-80).
112    #[serde(default = "default_left_pct")]
113    pub left_pct: u16,
114    /// UI language (en, fr, it). Falls back to en.
115    #[serde(default = "default_language")]
116    pub language: String,
117}
118
119fn default_language() -> String {
120    "en".to_string()
121}
122
123fn default_left_pct() -> u16 {
124    55
125}
126
127fn default_search_limit() -> u32 {
128    50
129}
130
131/// A named profile of settings.
132#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
133pub struct Profile {
134    pub name: String,
135    /// Brief description shown in the profile list.
136    pub description: String,
137    /// The settings for this profile.
138    #[serde(default)]
139    pub settings: ModelOverride,
140}
141
142impl Profile {
143    /// Apply this profile's settings to a base ModelSettings.
144    pub fn apply(&self, mut base: crate::models::ModelSettings) -> crate::models::ModelSettings {
145        self.settings.apply(&mut base);
146        base
147    }
148}
149
150/// A named system prompt preset.
151#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct SystemPromptPreset {
153    pub name: String,
154    pub description: String,
155    pub content: String,
156}
157
158/// Built-in system prompt presets.
159pub fn builtin_system_prompt_presets() -> Vec<SystemPromptPreset> {
160    vec![
161        SystemPromptPreset {
162            name: "General".into(),
163            description: "General-purpose assistant".into(),
164            content: "You are a helpful assistant.".into(),
165        },
166        SystemPromptPreset {
167            name: "Coder".into(),
168            description: "Expert software developer".into(),
169            content: "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.".into(),
170        },
171        SystemPromptPreset {
172            name: "Thinker".into(),
173            description: "Analytical and thoughtful".into(),
174            content: "You are a thoughtful and analytical AI assistant. Think carefully before answering. Provide well-reasoned responses with clear explanations.".into(),
175        },
176        SystemPromptPreset {
177            name: "Mathematician".into(),
178            description: "Expert in mathematics".into(),
179            content: "You are an expert in mathematics. Provide clear, step-by-step solutions to mathematical problems. Show your reasoning and explain key concepts.".into(),
180        },
181    ]
182}
183
184#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
185pub struct ModelOverride {
186    // Loading
187    pub context_length: Option<u32>,
188    pub batch_size: Option<u32>,
189    pub ubatch_size: Option<u32>,
190    pub cache_type_k: Option<CacheTypeK>,
191    pub cache_type_v: Option<CacheTypeV>,
192    pub keep: Option<i32>,
193    pub swa_full: Option<bool>,
194    pub mlock: Option<bool>,
195    pub mmap: Option<bool>,
196    pub numa: Option<NumMode>,
197    pub uniform_cache: Option<bool>,
198    pub system_prompt: Option<String>,
199    pub system_prompt_preset_name: Option<String>,
200    pub max_concurrent_predictions: Option<u32>,
201    pub threads: Option<u32>,
202    pub threads_batch: Option<u32>,
203    pub parallel: Option<u32>,
204
205    // GPU
206    pub gpu_layers: Option<i32>,
207    pub split_mode: Option<SplitMode>,
208    pub tensor_split: Option<String>,
209    pub main_gpu: Option<i32>,
210    pub fit: Option<bool>,
211    pub lora: Option<PathBuf>,
212    pub lora_scaled: Option<(PathBuf, f32)>,
213    pub rpc: Option<String>,
214    pub embedding: Option<bool>,
215    pub kv_cache_offload: Option<bool>,
216    pub flash_attn: Option<bool>,
217    pub jinja: Option<bool>,
218    pub chat_template: Option<String>,
219    pub chat_template_kwargs: Option<String>,
220    pub expert_count: Option<i32>,
221    pub gpu_layers_mode: Option<crate::models::GpuLayersMode>,
222
223    // Sampling
224    pub seed: Option<i32>,
225    pub temperature: Option<f32>,
226    pub top_k: Option<i32>,
227    pub top_p: Option<f32>,
228    pub min_p: Option<f32>,
229    pub typical_p: Option<f32>,
230    pub mirostat: Option<Mirostat>,
231    pub mirostat_lr: Option<f32>,
232    pub mirostat_ent: Option<f32>,
233    pub ignore_eos: Option<bool>,
234    pub samplers: Option<Samplers>,
235
236    // Repetition
237    pub repeat_penalty: Option<f32>,
238    pub repeat_last_n: Option<i32>,
239    pub presence_penalty: Option<f32>,
240    pub frequency_penalty: Option<f32>,
241    pub dry_multiplier: Option<f32>,
242    pub dry_base: Option<f32>,
243    pub dry_allowed_length: Option<i32>,
244    pub dry_penalty_last_n: Option<i32>,
245
246    // RoPE
247    pub rope_scaling: Option<RopeScaling>,
248    pub rope_scale: Option<f32>,
249    pub rope_freq_base: Option<f32>,
250    pub rope_freq_scale: Option<f32>,
251    pub rope_yarn_enabled: Option<bool>,
252
253    // Server
254    pub cache_prompt: Option<bool>,
255    pub cache_reuse: Option<u32>,
256    pub webui: Option<bool>,
257
258    // Other
259    pub max_tokens: Option<u32>,
260    pub cache_type: Option<CacheType>,
261    pub llama_cpp_version_cpu: Option<String>,
262    pub llama_cpp_version_vulkan: Option<String>,
263    pub llama_cpp_version_rocm: Option<String>,
264    pub llama_cpp_version_rocm_lemonade: Option<String>,
265    pub llama_cpp_version_cuda: Option<String>,
266    pub spec_type: Option<String>,
267    pub draft_tokens: Option<u32>,
268    pub tags: Option<Vec<String>>,
269}
270
271/// Apply a scalar Copy field from override: `base.f = self.f.unwrap_or(base.f)`.
272macro_rules! apply_scalar {
273    ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
274        $(
275            $base.$field = $self.$field.unwrap_or($base.$field);
276        )+
277    };
278}
279
280/// Apply a Clone field from override: `if let Some(v) = &self.f { base.f = v.clone(); }`.
281macro_rules! apply_clone {
282    ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
283        $(
284            if let Some(v) = &$self.$field {
285                $base.$field = v.clone();
286            }
287        )+
288    };
289}
290
291/// Apply an Option<T> field from override: `if let Some(v) = &self.f { base.f = Some(v.clone()); }`.
292macro_rules! apply_option {
293    ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
294        $(
295            if let Some(v) = &$self.$field {
296                $base.$field = Some(v.clone());
297            }
298        )+
299    };
300}
301
302impl ModelOverride {
303    pub fn from_settings(s: &crate::models::ModelSettings) -> Self {
304        Self {
305            context_length: Some(s.context_length),
306            batch_size: Some(s.batch_size),
307            ubatch_size: Some(s.ubatch_size),
308            cache_type_k: s.cache_type_k,
309            cache_type_v: s.cache_type_v,
310            keep: Some(s.keep),
311            swa_full: Some(s.swa_full),
312            mlock: Some(s.mlock),
313            mmap: Some(s.mmap),
314            numa: Some(s.numa),
315            uniform_cache: Some(s.uniform_cache),
316            system_prompt: Some(s.system_prompt.clone()),
317            system_prompt_preset_name: Some(s.system_prompt_preset_name.clone()),
318            max_concurrent_predictions: s.max_concurrent_predictions,
319            threads: Some(s.threads),
320            threads_batch: Some(s.threads_batch),
321            parallel: Some(s.parallel),
322            gpu_layers: Some(match s.gpu_layers_mode {
323                crate::models::GpuLayersMode::Auto => 0,
324                crate::models::GpuLayersMode::Specific(n) => n as i32,
325                crate::models::GpuLayersMode::All => -1,
326            }),
327            gpu_layers_mode: Some(s.gpu_layers_mode),
328            split_mode: Some(s.split_mode),
329            tensor_split: Some(s.tensor_split.clone()),
330            main_gpu: Some(s.main_gpu),
331            fit: Some(s.fit),
332            lora: s.lora.clone(),
333            lora_scaled: s.lora_scaled.clone(),
334            rpc: Some(s.rpc.clone()),
335            embedding: Some(s.embedding),
336            kv_cache_offload: Some(s.kv_cache_offload),
337            flash_attn: Some(s.flash_attn),
338            jinja: Some(s.jinja),
339            chat_template: s.chat_template.clone(),
340            chat_template_kwargs: s.chat_template_kwargs.clone(),
341            expert_count: Some(s.expert_count),
342            seed: Some(s.seed),
343            temperature: Some(s.temperature),
344            top_k: Some(s.top_k),
345            top_p: Some(s.top_p),
346            min_p: Some(s.min_p),
347            typical_p: Some(s.typical_p),
348            mirostat: Some(s.mirostat),
349            mirostat_lr: Some(s.mirostat_lr),
350            mirostat_ent: Some(s.mirostat_ent),
351            ignore_eos: Some(s.ignore_eos),
352            samplers: Some(s.samplers.clone()),
353            repeat_penalty: Some(s.repeat_penalty),
354            repeat_last_n: Some(s.repeat_last_n),
355            presence_penalty: s.presence_penalty,
356            frequency_penalty: s.frequency_penalty,
357            dry_multiplier: Some(s.dry_multiplier),
358            dry_base: Some(s.dry_base),
359            dry_allowed_length: Some(s.dry_allowed_length),
360            dry_penalty_last_n: Some(s.dry_penalty_last_n),
361            rope_scaling: Some(s.rope_scaling),
362            rope_scale: Some(s.rope_scale),
363            rope_freq_base: Some(s.rope_freq_base),
364            rope_freq_scale: Some(s.rope_freq_scale),
365            rope_yarn_enabled: Some(s.rope_yarn_enabled),
366            cache_prompt: Some(s.cache_prompt),
367            cache_reuse: Some(s.cache_reuse),
368            webui: Some(s.webui),
369            max_tokens: s.max_tokens,
370            cache_type: Some(s.cache_type),
371            llama_cpp_version_cpu: s.llama_cpp_version_cpu.clone(),
372            llama_cpp_version_vulkan: s.llama_cpp_version_vulkan.clone(),
373            llama_cpp_version_rocm: s.llama_cpp_version_rocm.clone(),
374            llama_cpp_version_rocm_lemonade: s.llama_cpp_version_rocm_lemonade.clone(),
375            llama_cpp_version_cuda: s.llama_cpp_version_cuda.clone(),
376            spec_type: Some(s.spec_type.clone()),
377            draft_tokens: Some(s.draft_tokens),
378            tags: Some(s.tags.clone()),
379        }
380    }
381
382    /// Merge override into a base ModelSettings (in-place).
383    pub fn apply(&self, base: &mut crate::models::ModelSettings) {
384        // Override values always take precedence. For Option<T> fields,
385        // the override value (even None) is explicitly set by the user.
386
387        // Scalar Copy fields: base.f = self.f.unwrap_or(base.f)
388        apply_scalar!(
389            self,
390            base,
391            context_length,
392            batch_size,
393            ubatch_size,
394            keep,
395            swa_full,
396            mlock,
397            mmap,
398            numa,
399            uniform_cache,
400            kv_cache_offload,
401            threads,
402            threads_batch,
403            parallel,
404            split_mode,
405            main_gpu,
406            fit,
407            embedding,
408            flash_attn,
409            jinja,
410            expert_count,
411            seed,
412            temperature,
413            top_k,
414            top_p,
415            min_p,
416            typical_p,
417            mirostat,
418            mirostat_lr,
419            mirostat_ent,
420            ignore_eos,
421            repeat_penalty,
422            repeat_last_n,
423            dry_multiplier,
424            dry_base,
425            dry_allowed_length,
426            dry_penalty_last_n,
427            rope_scaling,
428            rope_scale,
429            rope_freq_base,
430            rope_freq_scale,
431            rope_yarn_enabled,
432            cache_prompt,
433            cache_reuse,
434            webui,
435            cache_type,
436            draft_tokens,
437            gpu_layers_mode,
438        );
439
440        // Cloneable fields: if let Some(v) = &self.f { base.f = v.clone(); }
441        apply_clone!(
442            self,
443            base,
444            system_prompt,
445            system_prompt_preset_name,
446            tensor_split,
447            rpc,
448            samplers,
449            spec_type,
450            tags,
451        );
452
453        // Option<T> fields: if let Some(v) = &self.f { base.f = Some(v.clone()); }
454        apply_option!(
455            self,
456            base,
457            lora,
458            lora_scaled,
459            chat_template,
460            chat_template_kwargs,
461            llama_cpp_version_cpu,
462            llama_cpp_version_vulkan,
463            llama_cpp_version_rocm,
464            llama_cpp_version_rocm_lemonade,
465            llama_cpp_version_cuda,
466        );
467
468        // Direct Option<T> assignment (same type in both structs)
469        base.cache_type_k = self.cache_type_k;
470        base.cache_type_v = self.cache_type_v;
471        base.presence_penalty = self.presence_penalty;
472        base.frequency_penalty = self.frequency_penalty;
473        base.max_tokens = self.max_tokens;
474
475        // Special: max_concurrent_predictions uses or() for Option chaining
476        base.max_concurrent_predictions = self
477            .max_concurrent_predictions
478            .or(base.max_concurrent_predictions);
479
480        // Special: gpu_layers converts i32 legacy field to GpuLayersMode enum
481        // Only applies when gpu_layers is explicitly set in the override.
482        if let Some(n) = self.gpu_layers {
483            base.gpu_layers_mode = match n {
484                n if n < 0 => crate::models::GpuLayersMode::All,
485                n => crate::models::GpuLayersMode::Specific(n as u32),
486            };
487        }
488
489        // FIELD ACCOUNTING (ModelOverride: 87 fields):
490        // - apply_scalar: 53 fields
491        // - apply_clone: 7 fields
492        // - apply_option: 10 fields
493        // - direct Option assign: 5 fields (cache_type_k, cache_type_v, presence_penalty,
494        //   frequency_penalty, max_tokens)
495        // - special: 1 field (max_concurrent_predictions)
496        // - conditional: gpu_layers overrides gpu_layers_mode only when Some
497        // - NOT in ModelSettings: 0 (all ModelOverride fields mapped above)
498        //
499        // ModelSettings fields NOT in ModelOverride (not overridable):
500        // host, port, timeout, backend, platform, router_max_models, server_mode,
501        // api_endpoint_enabled, api_endpoint_port
502        //
503        // When adding a field: ensure it appears in exactly one category above.
504    }
505}
506
507/// Built-in profiles with sensible defaults for popular model families.
508pub fn builtin_profiles() -> Vec<Profile> {
509    vec![
510        Profile {
511            name: "Qwen".into(),
512            description: "Optimized for Qwen models (dense)".into(),
513            settings: ModelOverride {
514                context_length: Some(131072),
515                temperature: Some(0.7),
516                top_k: Some(20),
517                top_p: Some(0.95),
518                max_tokens: Some(4096),
519                presence_penalty: Some(0.0),
520                uniform_cache: Some(true),
521                jinja: Some(true),
522                ..Default::default()
523            },
524        },
525        Profile {
526            name: "Qwen-MoE".into(),
527            description: "Optimized for Qwen MoE models (35B-A3B)".into(),
528            settings: ModelOverride {
529                context_length: Some(131072),
530                temperature: Some(0.8),
531                top_k: Some(20),
532                top_p: Some(0.95),
533                max_tokens: Some(4096),
534                presence_penalty: Some(1.5),
535                uniform_cache: Some(true),
536                jinja: Some(true),
537                ..Default::default()
538            },
539        },
540        Profile {
541            name: "Qwen-Coding".into(),
542            description: "Optimized for Qwen models in coding mode".into(),
543            settings: ModelOverride {
544                context_length: Some(131072),
545                temperature: Some(0.6),
546                top_k: Some(20),
547                top_p: Some(0.95),
548                max_tokens: Some(4096),
549                presence_penalty: Some(0.0),
550                uniform_cache: Some(true),
551                jinja: Some(true),
552                ..Default::default()
553            },
554        },
555        Profile {
556            name: "Gemma".into(),
557            description: "Optimized for Gemma 2/4 models".into(),
558            settings: ModelOverride {
559                context_length: Some(131072),
560                min_p: Some(0.1),
561                temperature: Some(1.0),
562                top_k: Some(65),
563                top_p: Some(0.95),
564                max_tokens: Some(4096),
565                uniform_cache: Some(true),
566                jinja: Some(true),
567                ..Default::default()
568            },
569        },
570        Profile {
571            name: "Llama".into(),
572            description: "Optimized for Llama 3.1/3.3 models".into(),
573            settings: ModelOverride {
574                context_length: Some(131072),
575                temperature: Some(0.7),
576                top_p: Some(0.9),
577                repeat_penalty: Some(1.1),
578                max_tokens: Some(4096),
579                uniform_cache: Some(true),
580                jinja: Some(true),
581                ..Default::default()
582            },
583        },
584        Profile {
585            name: "Mistral".into(),
586            description: "Optimized for Mistral 7B/NeMo models".into(),
587            settings: ModelOverride {
588                context_length: Some(131072),
589                temperature: Some(0.7),
590                top_k: Some(50),
591                top_p: Some(0.9),
592                max_tokens: Some(4096),
593                uniform_cache: Some(true),
594                jinja: Some(true),
595                ..Default::default()
596            },
597        },
598        Profile {
599            name: "Phi".into(),
600            description: "Optimized for Phi 3.5 Mini models".into(),
601            settings: ModelOverride {
602                context_length: Some(131072),
603                temperature: Some(0.7),
604                top_k: Some(50),
605                top_p: Some(0.9),
606                repeat_penalty: Some(1.1),
607                max_tokens: Some(4096),
608                uniform_cache: Some(true),
609                ..Default::default()
610            },
611        },
612    ]
613}
614
615#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
616#[serde(default)]
617pub struct DefaultParams {
618    // Loading
619    #[serde(default)]
620    pub context_length: u32,
621    #[serde(default)]
622    pub threads: u32,
623    #[serde(default)]
624    pub threads_batch: u32,
625    #[serde(default)]
626    pub batch_size: u32,
627    #[serde(default)]
628    pub ubatch_size: u32,
629    #[serde(default = "default_cache_type_k")]
630    pub cache_type_k: Option<CacheTypeK>,
631    #[serde(default = "default_cache_type_v")]
632    pub cache_type_v: Option<CacheTypeV>,
633    #[serde(default)]
634    pub keep: i32,
635    #[serde(default)]
636    pub swa_full: bool,
637    #[serde(default)]
638    pub mlock: bool,
639    #[serde(default)]
640    pub mmap: bool,
641    #[serde(default)]
642    pub numa: NumMode,
643    #[serde(default)]
644    pub uniform_cache: bool,
645    #[serde(default)]
646    pub kv_cache_offload: bool,
647    #[serde(default)]
648    pub parallel: u32,
649    #[serde(default)]
650    pub max_concurrent_predictions: Option<u32>,
651    #[serde(default)]
652    pub system_prompt: String,
653    #[serde(default = "default_system_prompt_preset_name")]
654    pub system_prompt_preset_name: String,
655    // GPU
656    #[serde(default)]
657    pub gpu_layers: i32,
658    #[serde(default = "default_gpu_layers_mode")]
659    pub gpu_layers_mode: crate::models::GpuLayersMode,
660    #[serde(default)]
661    pub split_mode: SplitMode,
662    #[serde(default)]
663    pub tensor_split: String,
664    #[serde(default)]
665    pub main_gpu: i32,
666    #[serde(default)]
667    pub fit: bool,
668    #[serde(default)]
669    pub lora: Option<PathBuf>,
670    #[serde(default)]
671    pub lora_scaled: Option<(PathBuf, f32)>,
672    #[serde(default)]
673    pub rpc: String,
674    #[serde(default)]
675    pub embedding: bool,
676    #[serde(default)]
677    pub flash_attn: bool,
678    #[serde(default)]
679    pub jinja: bool,
680    #[serde(default)]
681    pub chat_template: Option<String>,
682    #[serde(default)]
683    pub chat_template_kwargs: Option<String>,
684    #[serde(default)]
685    pub expert_count: i32,
686
687    // Sampling
688    #[serde(default)]
689    pub seed: i32,
690    #[serde(default)]
691    pub temperature: f32,
692    #[serde(default)]
693    pub top_k: i32,
694    #[serde(default)]
695    pub top_p: f32,
696    #[serde(default)]
697    pub min_p: f32,
698    #[serde(default)]
699    pub typical_p: f32,
700    #[serde(default)]
701    pub mirostat: Mirostat,
702    #[serde(default)]
703    pub mirostat_lr: f32,
704    #[serde(default)]
705    pub mirostat_ent: f32,
706    #[serde(default)]
707    pub ignore_eos: bool,
708    #[serde(default)]
709    pub samplers: Samplers,
710
711    // Repetition
712    #[serde(default)]
713    pub repeat_penalty: f32,
714    #[serde(default)]
715    pub repeat_last_n: i32,
716    #[serde(default = "default_presence_penalty")]
717    pub presence_penalty: Option<f32>,
718    #[serde(default = "default_frequency_penalty")]
719    pub frequency_penalty: Option<f32>,
720    #[serde(default)]
721    pub dry_multiplier: f32,
722    #[serde(default)]
723    pub dry_base: f32,
724    #[serde(default)]
725    pub dry_allowed_length: i32,
726    #[serde(default)]
727    pub dry_penalty_last_n: i32,
728
729    // RoPE
730    #[serde(default)]
731    pub rope_scaling: RopeScaling,
732    #[serde(default)]
733    pub rope_scale: f32,
734    #[serde(default)]
735    pub rope_freq_base: f32,
736    #[serde(default)]
737    pub rope_freq_scale: f32,
738    #[serde(default)]
739    pub rope_yarn_enabled: bool,
740
741    // Server
742    #[serde(default)]
743    pub host: String,
744    #[serde(default)]
745    pub port: u16,
746    #[serde(default)]
747    pub timeout: u32,
748    #[serde(default = "default_cache_prompt")]
749    pub cache_prompt: bool,
750    #[serde(default)]
751    pub cache_reuse: u32,
752    #[serde(default)]
753    pub webui: bool,
754    #[serde(default)]
755    pub ws_server_enabled: bool,
756    #[serde(default = "default_ws_server_port")]
757    pub ws_server_port: u16,
758    #[serde(default)]
759    pub ws_server_auth_key: Option<String>,
760    #[serde(default = "default_ws_server_tls_enabled")]
761    pub ws_server_tls_enabled: bool,
762    #[serde(default)]
763    pub ws_server_tls_cert: Option<String>,
764    #[serde(default)]
765    pub ws_server_tls_key: Option<String>,
766    #[serde(default)]
767    pub router_max_models: u32,
768    #[serde(default)]
769    pub server_mode: crate::models::ServerMode,
770
771    // Other
772    #[serde(default = "default_max_tokens")]
773    pub max_tokens: Option<u32>,
774    #[serde(default)]
775    pub cache_type: CacheType,
776    #[serde(default)]
777    pub backend: Backend,
778    /// Platform override: "linux", "windows", or "macos". If None, auto-detected.
779    #[serde(default)]
780    pub platform: Option<String>,
781    #[serde(default)]
782    pub llama_cpp_version_cpu: Option<String>,
783    #[serde(default)]
784    pub llama_cpp_version_vulkan: Option<String>,
785    #[serde(default)]
786    pub llama_cpp_version_rocm: Option<String>,
787    #[serde(default)]
788    pub llama_cpp_version_rocm_lemonade: Option<String>,
789    #[serde(default)]
790    pub llama_cpp_version_cuda: Option<String>,
791
792    // API
793    #[serde(default)]
794    pub api_endpoint_enabled: bool,
795    #[serde(default = "default_api_endpoint_port")]
796    pub api_endpoint_port: u16,
797    #[serde(default)]
798    pub spec_type: String,
799    #[serde(default)]
800    pub draft_tokens: u32,
801    #[serde(default)]
802    pub tags: Vec<String>,
803}
804
805fn default_api_endpoint_port() -> u16 {
806    49222
807}
808
809fn default_system_prompt_preset_name() -> String {
810    "General".to_string()
811}
812
813fn default_cache_type_k() -> Option<CacheTypeK> {
814    None
815}
816fn default_cache_type_v() -> Option<CacheTypeV> {
817    None
818}
819fn default_presence_penalty() -> Option<f32> {
820    None
821}
822fn default_frequency_penalty() -> Option<f32> {
823    None
824}
825fn default_max_tokens() -> Option<u32> {
826    None
827}
828fn default_cache_prompt() -> bool {
829    true
830}
831fn default_ws_server_port() -> u16 {
832    49223
833}
834
835fn default_ws_server_tls_enabled() -> bool {
836    true
837}
838
839fn default_gpu_layers_mode() -> crate::models::GpuLayersMode {
840    crate::models::GpuLayersMode::Auto
841}
842
843impl Default for DefaultParams {
844    fn default() -> Self {
845        Self {
846            // Loading
847            context_length: 131072,
848            threads: physical_cores(),
849            threads_batch: 8,
850            batch_size: 512,
851            ubatch_size: 512,
852            cache_type_k: None,
853            cache_type_v: None,
854            keep: 0,
855            swa_full: false,
856            mlock: false,
857            mmap: true,
858            numa: NumMode::None,
859            uniform_cache: true,
860            kv_cache_offload: true,
861            parallel: 1,
862            max_concurrent_predictions: None,
863            system_prompt: DEFAULT_SYSTEM_PROMPT.to_string(),
864            system_prompt_preset_name: "Coder".to_string(),
865
866            // GPU
867            gpu_layers: -1,
868            gpu_layers_mode: crate::models::GpuLayersMode::Auto,
869            split_mode: SplitMode::Layer,
870            tensor_split: String::new(),
871            main_gpu: 0,
872            fit: true,
873            lora: None,
874            lora_scaled: None,
875            rpc: String::new(),
876            embedding: false,
877            flash_attn: true,
878            jinja: true,
879            chat_template: None,
880            chat_template_kwargs: None,
881            expert_count: -1,
882
883            // Sampling
884            seed: -1,
885            temperature: 0.8,
886            top_k: 40,
887            top_p: 0.95,
888            min_p: 0.0,
889            typical_p: 1.0,
890            mirostat: Mirostat::Off,
891            mirostat_lr: 0.1,
892            mirostat_ent: 5.0,
893            ignore_eos: false,
894            samplers: Samplers::default(),
895
896            // Repetition
897            repeat_penalty: 1.1,
898            repeat_last_n: 64,
899            presence_penalty: None,
900            frequency_penalty: None,
901            dry_multiplier: 0.0,
902            dry_base: 1.75,
903            dry_allowed_length: 2,
904            dry_penalty_last_n: -1,
905
906            // RoPE
907            rope_scaling: RopeScaling::None,
908            rope_scale: 1.0,
909            rope_freq_base: 0.0,
910            rope_freq_scale: 1.0,
911            rope_yarn_enabled: false,
912
913            // Server
914            host: "127.0.0.1".to_string(),
915            port: 8080,
916            timeout: 600,
917            cache_prompt: true,
918            cache_reuse: 0,
919            webui: false,
920            ws_server_enabled: false,
921            ws_server_port: 49223,
922            ws_server_auth_key: None,
923            ws_server_tls_enabled: true,
924            ws_server_tls_cert: None,
925            ws_server_tls_key: None,
926            router_max_models: 4,
927            server_mode: crate::models::ServerMode::Normal,
928
929            // Other
930            max_tokens: None,
931            cache_type: CacheType::F16,
932            backend: {
933                use crate::backend::hardware::{GpuVendor, detect_gpu_vendors};
934                let vendors = detect_gpu_vendors();
935                let mut result = Backend::Cpu;
936                for v in &vendors {
937                    if matches!(v, GpuVendor::Nvidia) {
938                        result = Backend::Cuda;
939                        break;
940                    }
941                    if matches!(v, GpuVendor::Amd) {
942                        result = Backend::Rocm;
943                        break;
944                    }
945                    if matches!(v, GpuVendor::Intel) {
946                        result = Backend::Vulkan;
947                        break;
948                    }
949                }
950                result
951            },
952            platform: None,
953            llama_cpp_version_cpu: None,
954            llama_cpp_version_vulkan: None,
955            llama_cpp_version_rocm: None,
956            llama_cpp_version_rocm_lemonade: None,
957            llama_cpp_version_cuda: None,
958            api_endpoint_enabled: false,
959            api_endpoint_port: 49222,
960            spec_type: String::new(),
961            draft_tokens: 0,
962            tags: Vec::new(),
963        }
964    }
965}
966
967impl Default for Config {
968    fn default() -> Self {
969        Self {
970            models_dirs: vec![
971                dirs::data_dir()
972                    .unwrap_or_default()
973                    .join("llm-manager")
974                    .join("models"),
975            ],
976            llama_server: "llama-server".into(),
977            default: DefaultParams::default(),
978            model_overrides: ModelConfigStore::new(vec![]),
979            profiles: Default::default(),
980            system_prompt_presets: Default::default(),
981            rpc_workers: Vec::new(),
982            search_limit: default_search_limit(),
983            active_panel: ActivePanel::Models,
984            left_pct: 55,
985            language: default_language(),
986        }
987    }
988}
989
990impl Config {
991    pub fn config_path() -> PathBuf {
992        config_base_dir().join("llm-manager").join("config.yaml")
993    }
994
995    /// Validate config values and return a list of warnings for invalid entries.
996    pub fn validate(&self) -> Vec<String> {
997        let mut warnings = Vec::new();
998        let default = &self.default;
999
1000        // Numeric range checks
1001        if default.context_length < 512 || default.context_length > 131072 {
1002            warnings.push(format!(
1003                "context_length {} is outside recommended range 512-131072",
1004                default.context_length
1005            ));
1006        }
1007        if default.temperature < 0.0 || default.temperature > 2.0 {
1008            warnings.push(format!(
1009                "temperature {} is outside recommended range 0.0-2.0",
1010                default.temperature
1011            ));
1012        }
1013        if (default.top_p < 0.0 || default.top_p > 1.0) && default.top_p != 0.0 {
1014            warnings.push(format!(
1015                "top_p {} is outside recommended range 0.0-1.0",
1016                default.top_p
1017            ));
1018        }
1019        if (default.repeat_penalty < 0.0 || default.repeat_penalty > 3.0)
1020            && default.repeat_penalty != 1.0
1021        {
1022            warnings.push(format!(
1023                "repeat_penalty {} is outside recommended range 0.0-3.0",
1024                default.repeat_penalty
1025            ));
1026        }
1027        if default.mirostat_lr < 0.0 || default.mirostat_lr > 1.0 {
1028            warnings.push(format!(
1029                "mirostat_lr {} is outside recommended range 0.0-1.0",
1030                default.mirostat_lr
1031            ));
1032        }
1033        if default.mirostat_ent < 0.0 || default.mirostat_ent > 10.0 {
1034            warnings.push(format!(
1035                "mirostat_ent {} is outside recommended range 0.0-10.0",
1036                default.mirostat_ent
1037            ));
1038        }
1039
1040        if default.timeout < 1 {
1041            warnings.push(format!(
1042                "timeout {} must be at least 1 second",
1043                default.timeout
1044            ));
1045        }
1046
1047        // Path validation
1048        if let Some(lora) = &default.lora
1049            && !lora.exists()
1050        {
1051            warnings.push(format!("lora path {} does not exist", lora.display()));
1052        }
1053        if let Some((lora, _)) = &default.lora_scaled
1054            && !lora.exists()
1055        {
1056            warnings.push(format!("lora path {} does not exist", lora.display()));
1057        }
1058
1059        // Model override validation
1060        for model_name in self.model_overrides.keys() {
1061            if let Some(override_settings) = self.model_overrides.get(model_name.as_str()) {
1062                if let Some(lora) = &override_settings.lora
1063                    && !lora.exists()
1064                {
1065                    warnings.push(format!(
1066                        "model '{}' lora path {} does not exist",
1067                        model_name,
1068                        lora.display()
1069                    ));
1070                }
1071                if let Some((lora, _)) = &override_settings.lora_scaled
1072                    && !lora.exists()
1073                {
1074                    warnings.push(format!(
1075                        "model '{}' lora path {} does not exist",
1076                        model_name,
1077                        lora.display()
1078                    ));
1079                }
1080            }
1081        }
1082
1083        warnings
1084    }
1085
1086    /// Resolve settings for a specific model and profile.
1087    pub fn resolve_settings(
1088        &self,
1089        model_name: Option<&str>,
1090        profile_name: Option<&str>,
1091    ) -> crate::models::ModelSettings {
1092        let mut settings = crate::models::ModelSettings::from_config(self);
1093
1094        // Apply model-specific override
1095        if let Some(name) = model_name
1096            && let Some(override_settings) = self.model_overrides.get(name)
1097        {
1098            override_settings.apply(&mut settings);
1099        }
1100
1101        // Apply profile override if specified
1102        if let Some(p_name) = profile_name {
1103            if let Some(profile) = self.profiles.get(p_name) {
1104                profile.settings.apply(&mut settings);
1105            } else if let Some(profile) = builtin_profiles().iter().find(|p| p.name == p_name) {
1106                profile.settings.apply(&mut settings);
1107            }
1108        }
1109
1110        settings
1111    }
1112
1113    /// Get a system prompt preset content by name.
1114    pub fn get_preset_content(&self, name: &str) -> Option<String> {
1115        self.system_prompt_presets
1116            .get(name)
1117            .map(|p| p.content.clone())
1118    }
1119
1120    fn normalize_config(mut config: Config) -> Config {
1121        // normalize models_dirs
1122        for path in &mut config.models_dirs {
1123            let path_str = path.to_string_lossy();
1124            if let Some(stripped) = path_str.strip_prefix("~/") {
1125                let home = dirs::home_dir().unwrap_or_default();
1126                *path = home.join(stripped);
1127            } else if !path.is_absolute() {
1128                let home = dirs::home_dir().unwrap_or_default();
1129                *path = home.join(path_str.as_ref());
1130            }
1131        }
1132
1133        // Merge built-in profiles into in-memory cache (do not persist to disk)
1134        for p in builtin_profiles() {
1135            if config.profiles.get(&p.name).is_none() {
1136                config.profiles.insert_builtin(p);
1137            }
1138        }
1139
1140        // Merge built-in system prompt presets into in-memory cache (do not persist to disk)
1141        for p in builtin_system_prompt_presets() {
1142            if config.system_prompt_presets.get(&p.name).is_none() {
1143                config.system_prompt_presets.insert_builtin(p);
1144            }
1145        }
1146        config
1147    }
1148
1149    fn load_impl(path: &PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1150        let content = std::fs::read_to_string(path)?;
1151        let config: Config = serde_yaml::from_str(&content)
1152            .map_err(|e| format!("Failed to parse config file {}: {}", path.display(), e))?;
1153        let config = Self::normalize_config(config);
1154        let config = config.auto_detect_platform();
1155        let warnings = config.validate();
1156        if !warnings.is_empty() {
1157            eprintln!("Config validation warnings:");
1158            for warning in &warnings {
1159                eprintln!("  - {}", warning);
1160            }
1161        }
1162        Ok(config)
1163    }
1164
1165    pub fn load() -> Result<Self, Box<dyn std::error::Error>> {
1166        let path = Self::config_path();
1167        if path.exists() {
1168            Self::load_impl(&path)
1169        } else {
1170            let mut config = Config::default();
1171            config.save()?;
1172            Ok(config)
1173        }
1174    }
1175
1176    pub fn load_from(path: PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1177        if path.exists() {
1178            Self::load_impl(&path)
1179        } else {
1180            Err(format!("Config file not found: {}", path.display()).into())
1181        }
1182    }
1183
1184    /// Auto-detect the platform if not explicitly set in config.
1185    fn auto_detect_platform(mut self) -> Self {
1186        if self.default.platform.is_none() {
1187            self.default.platform =
1188                Some(
1189                    crate::backend::hardware::platform_name(
1190                        crate::backend::hardware::detect_platform(),
1191                    )
1192                    .to_string(),
1193                );
1194        }
1195        self
1196    }
1197
1198    pub fn save(&mut self) -> Result<(), Box<dyn std::error::Error>> {
1199        let path = Self::config_path();
1200        if let Some(parent) = path.parent() {
1201            std::fs::create_dir_all(parent)?;
1202        }
1203        let content = serde_yaml::to_string(self)?;
1204        std::fs::write(&path, content)?;
1205        // Persist model configs to individual YAML files
1206        let entries: Vec<(String, ModelOverride)> = self
1207            .model_overrides
1208            .keys()
1209            .iter()
1210            .filter_map(|k| self.model_overrides.get(k).map(|v| (k.clone(), v.clone())))
1211            .collect();
1212        for (name, cfg) in entries {
1213            self.model_overrides.save(&name, &cfg);
1214        }
1215        // Persist user profiles to individual YAML files (skip built-ins)
1216        for profile in self.profiles.user_profiles() {
1217            self.profiles.save(&profile);
1218        }
1219        // Persist user presets to individual YAML files (skip built-ins)
1220        for preset in self.system_prompt_presets.user_presets() {
1221            self.system_prompt_presets.save(&preset);
1222        }
1223        Ok(())
1224    }
1225
1226    pub fn merged_profiles(&self) -> Vec<Profile> {
1227        self.profiles.all()
1228    }
1229
1230    pub fn merged_presets(&self) -> Vec<SystemPromptPreset> {
1231        self.system_prompt_presets.all()
1232    }
1233}
1234
1235#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1236pub enum LogLevel {
1237    Info,
1238    Warning,
1239    Error,
1240}
1241
1242impl LogLevel {
1243    pub fn label(&self) -> &'static str {
1244        match self {
1245            LogLevel::Info => "INFO",
1246            LogLevel::Warning => "WARNING",
1247            LogLevel::Error => "ERROR",
1248        }
1249    }
1250}
1251
1252#[derive(Debug, Clone)]
1253pub struct LogEntry {
1254    pub timestamp: String,
1255    pub level: LogLevel,
1256    pub message: String,
1257}
1258
1259impl LogEntry {
1260    pub fn new(message: impl Into<String>, level: LogLevel) -> Self {
1261        let timestamp = Local::now().format("%H:%M:%S").to_string();
1262        let message = sanitize_log(&message.into());
1263        Self {
1264            timestamp,
1265            level,
1266            message,
1267        }
1268    }
1269}
1270
1271/// Sanitize log messages to prevent TUI layout breakages.
1272/// Strips non-printable characters and control sequences, and limits length.
1273fn sanitize_log(input: &str) -> String {
1274    // Limit length to avoid layout/perf issues with massive lines
1275    let max_len = 2000;
1276    let chars: Vec<char> = input.chars().collect();
1277    let truncated = chars.len() > max_len;
1278    let chars = if truncated {
1279        chars[..max_len].to_vec()
1280    } else {
1281        chars
1282    };
1283
1284    let mut output = String::with_capacity(chars.len());
1285    for c in chars {
1286        // Strip ALL control characters except newline and tab.
1287        // Critically: strip \r (carriage return) as it breaks TUI rendering.
1288        if c.is_control() && c != '\n' && c != '\t' {
1289            continue;
1290        }
1291        output.push(c);
1292    }
1293
1294    // Replace tabs with spaces for consistent rendering
1295    let output = output.replace('\t', "    ");
1296
1297    // Final trim to remove trailing junk
1298    let mut result = output.trim_end().to_string();
1299    if truncated {
1300        result.push_str("... (truncated)");
1301    }
1302    result
1303}
llm_manager/config.rs

llm_manager/
config.rs