1mod model_config;
2mod presets;
3mod profiles;
4mod store;
5
6use std::collections::HashSet;
7use std::path::PathBuf;
8
9use chrono::Local;
10use serde::{Deserialize, Serialize};
11
12pub use model_config::ModelConfigStore;
13
14pub use profiles::ProfileStore;
15
16use crate::models::{
17 Backend, CacheType, CacheTypeK, CacheTypeV, Mirostat, NumMode, RopeScaling, Samplers, SplitMode,
18};
19use crate::tui::app::ActivePanel;
20pub use presets::PresetStore;
21
22pub fn config_base_dir() -> PathBuf {
27 if let Some(d) = dirs::config_dir() {
28 return d;
29 }
30 if let Some(home) = dirs::home_dir() {
31 return home.join(".config");
32 }
33 PathBuf::from(".").join(".llm-manager")
34}
35
36pub fn physical_cores() -> u32 {
39 let content = match std::fs::read_to_string("/proc/cpuinfo") {
40 Ok(c) => c,
41 Err(_) => {
42 return std::thread::available_parallelism()
43 .map(|p| p.get() as u32)
44 .unwrap_or(1);
45 }
46 };
47 let mut seen = HashSet::new();
48 let mut cur_phys: Option<&str> = None;
49 let mut cur_core: Option<&str> = None;
50 for line in content.lines() {
51 if let Some((key, val)) = line.split_once(':') {
52 let key = key.trim();
53 let val = val.trim();
54 match key {
55 "physical id" => cur_phys = Some(val),
56 "core id" => cur_core = Some(val),
57 _ => {}
58 }
59 if let (Some(phys), Some(core)) = (cur_phys, cur_core) {
60 seen.insert((phys, core));
61 }
62 }
63 }
64 seen.len() as u32
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct RpcWorker {
70 #[serde(default)]
71 pub selected: bool,
72 #[serde(default)]
73 pub name: String,
74 pub ip: String,
75 #[serde(default = "default_rpc_port")]
76 pub port: u16,
77}
78
79fn default_rpc_port() -> u16 {
80 50052
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct Config {
86 pub models_dirs: Vec<PathBuf>,
87 pub llama_server: PathBuf,
88 pub default: DefaultParams,
89 #[serde(default, skip)]
91 pub model_overrides: ModelConfigStore,
92 #[serde(default, skip)]
94 pub profiles: ProfileStore,
95 #[serde(default, skip)]
97 pub system_prompt_presets: PresetStore,
98 #[serde(default)]
100 pub rpc_workers: Vec<RpcWorker>,
101 #[serde(default = "default_search_limit")]
103 pub search_limit: u32,
104 #[serde(default)]
106 pub active_panel: crate::tui::app::ActivePanel,
107 #[serde(default = "default_left_pct")]
109 pub left_pct: u16,
110}
111
112fn default_left_pct() -> u16 {
113 55
114}
115
116fn default_search_limit() -> u32 {
117 50
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
122pub struct Profile {
123 pub name: String,
124 pub description: String,
126 #[serde(default)]
128 pub settings: ModelOverride,
129}
130
131impl Profile {
132 pub fn apply(&self, mut base: crate::models::ModelSettings) -> crate::models::ModelSettings {
134 self.settings.apply(&mut base);
135 base
136 }
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct SystemPromptPreset {
142 pub name: String,
143 pub description: String,
144 pub content: String,
145}
146
147pub fn builtin_system_prompt_presets() -> Vec<SystemPromptPreset> {
149 vec![
150 SystemPromptPreset {
151 name: "General".into(),
152 description: "General-purpose assistant".into(),
153 content: "You are a helpful assistant.".into(),
154 },
155 SystemPromptPreset {
156 name: "Coder".into(),
157 description: "Expert software developer".into(),
158 content: "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.".into(),
159 },
160 SystemPromptPreset {
161 name: "Thinker".into(),
162 description: "Analytical and thoughtful".into(),
163 content: "You are a thoughtful and analytical AI assistant. Think carefully before answering. Provide well-reasoned responses with clear explanations.".into(),
164 },
165 SystemPromptPreset {
166 name: "Mathematician".into(),
167 description: "Expert in mathematics".into(),
168 content: "You are an expert in mathematics. Provide clear, step-by-step solutions to mathematical problems. Show your reasoning and explain key concepts.".into(),
169 },
170 ]
171}
172
173#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
174pub struct ModelOverride {
175 pub context_length: Option<u32>,
177 pub batch_size: Option<u32>,
178 pub ubatch_size: Option<u32>,
179 pub cache_type_k: Option<CacheTypeK>,
180 pub cache_type_v: Option<CacheTypeV>,
181 pub keep: Option<i32>,
182 pub swa_full: Option<bool>,
183 pub mlock: Option<bool>,
184 pub mmap: Option<bool>,
185 pub numa: Option<NumMode>,
186 pub uniform_cache: Option<bool>,
187 pub system_prompt: Option<String>,
188 pub system_prompt_preset_name: Option<String>,
189 pub max_concurrent_predictions: Option<u32>,
190 pub threads: Option<u32>,
191 pub threads_batch: Option<u32>,
192 pub parallel: Option<u32>,
193
194 pub gpu_layers: Option<i32>,
196 pub split_mode: Option<SplitMode>,
197 pub tensor_split: Option<String>,
198 pub main_gpu: Option<i32>,
199 pub fit: Option<bool>,
200 pub lora: Option<PathBuf>,
201 pub lora_scaled: Option<(PathBuf, f32)>,
202 pub rpc: Option<String>,
203 pub embedding: Option<bool>,
204 pub kv_cache_offload: Option<bool>,
205 pub flash_attn: Option<bool>,
206 pub jinja: Option<bool>,
207 pub chat_template: Option<String>,
208 pub chat_template_kwargs: Option<String>,
209 pub expert_count: Option<i32>,
210 pub gpu_layers_mode: Option<crate::models::GpuLayersMode>,
211
212 pub seed: Option<i32>,
214 pub temperature: Option<f32>,
215 pub top_k: Option<i32>,
216 pub top_p: Option<f32>,
217 pub min_p: Option<f32>,
218 pub typical_p: Option<f32>,
219 pub mirostat: Option<Mirostat>,
220 pub mirostat_lr: Option<f32>,
221 pub mirostat_ent: Option<f32>,
222 pub ignore_eos: Option<bool>,
223 pub samplers: Option<Samplers>,
224
225 pub repeat_penalty: Option<f32>,
227 pub repeat_last_n: Option<i32>,
228 pub presence_penalty: Option<f32>,
229 pub frequency_penalty: Option<f32>,
230 pub dry_multiplier: Option<f32>,
231 pub dry_base: Option<f32>,
232 pub dry_allowed_length: Option<i32>,
233 pub dry_penalty_last_n: Option<i32>,
234
235 pub rope_scaling: Option<RopeScaling>,
237 pub rope_scale: Option<f32>,
238 pub rope_freq_base: Option<f32>,
239 pub rope_freq_scale: Option<f32>,
240 pub rope_yarn_enabled: Option<bool>,
241
242 pub cache_prompt: Option<bool>,
244 pub cache_reuse: Option<u32>,
245 pub webui: Option<bool>,
246
247 pub max_tokens: Option<u32>,
249 pub cache_type: Option<CacheType>,
250 pub llama_cpp_version_cpu: Option<String>,
251 pub llama_cpp_version_vulkan: Option<String>,
252 pub llama_cpp_version_rocm: Option<String>,
253 pub llama_cpp_version_rocm_lemonade: Option<String>,
254 pub llama_cpp_version_cuda: Option<String>,
255 pub spec_type: Option<String>,
256 pub draft_tokens: Option<u32>,
257 pub tags: Option<Vec<String>>,
258}
259
260macro_rules! apply_scalar {
262 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
263 $(
264 $base.$field = $self.$field.unwrap_or($base.$field);
265 )+
266 };
267}
268
269macro_rules! apply_clone {
271 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
272 $(
273 if let Some(v) = &$self.$field {
274 $base.$field = v.clone();
275 }
276 )+
277 };
278}
279
280macro_rules! apply_option {
282 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
283 $(
284 if let Some(v) = &$self.$field {
285 $base.$field = Some(v.clone());
286 }
287 )+
288 };
289}
290
291impl ModelOverride {
292 pub fn from_settings(s: &crate::models::ModelSettings) -> Self {
293 Self {
294 context_length: Some(s.context_length),
295 batch_size: Some(s.batch_size),
296 ubatch_size: Some(s.ubatch_size),
297 cache_type_k: s.cache_type_k,
298 cache_type_v: s.cache_type_v,
299 keep: Some(s.keep),
300 swa_full: Some(s.swa_full),
301 mlock: Some(s.mlock),
302 mmap: Some(s.mmap),
303 numa: Some(s.numa),
304 uniform_cache: Some(s.uniform_cache),
305 system_prompt: Some(s.system_prompt.clone()),
306 system_prompt_preset_name: Some(s.system_prompt_preset_name.clone()),
307 max_concurrent_predictions: s.max_concurrent_predictions,
308 threads: Some(s.threads),
309 threads_batch: Some(s.threads_batch),
310 parallel: Some(s.parallel),
311 gpu_layers: Some(match s.gpu_layers_mode {
312 crate::models::GpuLayersMode::Auto => 0,
313 crate::models::GpuLayersMode::Specific(n) => n as i32,
314 crate::models::GpuLayersMode::All => -1,
315 }),
316 gpu_layers_mode: Some(s.gpu_layers_mode),
317 split_mode: Some(s.split_mode),
318 tensor_split: Some(s.tensor_split.clone()),
319 main_gpu: Some(s.main_gpu),
320 fit: Some(s.fit),
321 lora: s.lora.clone(),
322 lora_scaled: s.lora_scaled.clone(),
323 rpc: Some(s.rpc.clone()),
324 embedding: Some(s.embedding),
325 kv_cache_offload: Some(s.kv_cache_offload),
326 flash_attn: Some(s.flash_attn),
327 jinja: Some(s.jinja),
328 chat_template: s.chat_template.clone(),
329 chat_template_kwargs: s.chat_template_kwargs.clone(),
330 expert_count: Some(s.expert_count),
331 seed: Some(s.seed),
332 temperature: Some(s.temperature),
333 top_k: Some(s.top_k),
334 top_p: Some(s.top_p),
335 min_p: Some(s.min_p),
336 typical_p: Some(s.typical_p),
337 mirostat: Some(s.mirostat),
338 mirostat_lr: Some(s.mirostat_lr),
339 mirostat_ent: Some(s.mirostat_ent),
340 ignore_eos: Some(s.ignore_eos),
341 samplers: Some(s.samplers.clone()),
342 repeat_penalty: Some(s.repeat_penalty),
343 repeat_last_n: Some(s.repeat_last_n),
344 presence_penalty: s.presence_penalty,
345 frequency_penalty: s.frequency_penalty,
346 dry_multiplier: Some(s.dry_multiplier),
347 dry_base: Some(s.dry_base),
348 dry_allowed_length: Some(s.dry_allowed_length),
349 dry_penalty_last_n: Some(s.dry_penalty_last_n),
350 rope_scaling: Some(s.rope_scaling),
351 rope_scale: Some(s.rope_scale),
352 rope_freq_base: Some(s.rope_freq_base),
353 rope_freq_scale: Some(s.rope_freq_scale),
354 rope_yarn_enabled: Some(s.rope_yarn_enabled),
355 cache_prompt: Some(s.cache_prompt),
356 cache_reuse: Some(s.cache_reuse),
357 webui: Some(s.webui),
358 max_tokens: s.max_tokens,
359 cache_type: Some(s.cache_type),
360 llama_cpp_version_cpu: s.llama_cpp_version_cpu.clone(),
361 llama_cpp_version_vulkan: s.llama_cpp_version_vulkan.clone(),
362 llama_cpp_version_rocm: s.llama_cpp_version_rocm.clone(),
363 llama_cpp_version_rocm_lemonade: s.llama_cpp_version_rocm_lemonade.clone(),
364 llama_cpp_version_cuda: s.llama_cpp_version_cuda.clone(),
365 spec_type: Some(s.spec_type.clone()),
366 draft_tokens: Some(s.draft_tokens),
367 tags: Some(s.tags.clone()),
368 }
369 }
370
371 pub fn apply(&self, base: &mut crate::models::ModelSettings) {
373 apply_scalar!(self, base,
378 context_length, batch_size, ubatch_size, keep, swa_full, mlock, mmap,
379 numa, uniform_cache, kv_cache_offload, threads, threads_batch, parallel,
380 split_mode, main_gpu, fit, embedding, flash_attn, jinja, expert_count,
381 seed, temperature, top_k, top_p, min_p, typical_p,
382 mirostat, mirostat_lr, mirostat_ent, ignore_eos,
383 repeat_penalty, repeat_last_n,
384 dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
385 rope_scaling, rope_scale, rope_freq_base, rope_freq_scale, rope_yarn_enabled,
386 cache_prompt, cache_reuse, webui, cache_type,
387 draft_tokens, gpu_layers_mode,
388 );
389
390 apply_clone!(self, base,
392 system_prompt, system_prompt_preset_name, tensor_split, rpc,
393 samplers, spec_type, tags,
394 );
395
396 apply_option!(self, base,
398 lora, lora_scaled, chat_template, chat_template_kwargs,
399 llama_cpp_version_cpu, llama_cpp_version_vulkan,
400 llama_cpp_version_rocm, llama_cpp_version_rocm_lemonade,
401 llama_cpp_version_cuda,
402 );
403
404 base.cache_type_k = self.cache_type_k;
406 base.cache_type_v = self.cache_type_v;
407 base.presence_penalty = self.presence_penalty;
408 base.frequency_penalty = self.frequency_penalty;
409 base.max_tokens = self.max_tokens;
410
411 base.max_concurrent_predictions = self
413 .max_concurrent_predictions
414 .or(base.max_concurrent_predictions);
415
416 if let Some(n) = self.gpu_layers {
419 base.gpu_layers_mode = match n {
420 n if n < 0 => crate::models::GpuLayersMode::All,
421 n => crate::models::GpuLayersMode::Specific(n as u32),
422 };
423 }
424
425 }
441}
442
443pub fn builtin_profiles() -> Vec<Profile> {
445 vec![
446 Profile {
447 name: "Qwen".into(),
448 description: "Optimized for Qwen models (dense)".into(),
449 settings: ModelOverride {
450 context_length: Some(131072),
451 temperature: Some(0.7),
452 top_k: Some(20),
453 top_p: Some(0.95),
454 max_tokens: Some(4096),
455 presence_penalty: Some(0.0),
456 uniform_cache: Some(true),
457 jinja: Some(true),
458 ..Default::default()
459 },
460 },
461 Profile {
462 name: "Qwen-MoE".into(),
463 description: "Optimized for Qwen MoE models (35B-A3B)".into(),
464 settings: ModelOverride {
465 context_length: Some(131072),
466 temperature: Some(0.8),
467 top_k: Some(20),
468 top_p: Some(0.95),
469 max_tokens: Some(4096),
470 presence_penalty: Some(1.5),
471 uniform_cache: Some(true),
472 jinja: Some(true),
473 ..Default::default()
474 },
475 },
476 Profile {
477 name: "Qwen-Coding".into(),
478 description: "Optimized for Qwen models in coding mode".into(),
479 settings: ModelOverride {
480 context_length: Some(131072),
481 temperature: Some(0.6),
482 top_k: Some(20),
483 top_p: Some(0.95),
484 max_tokens: Some(4096),
485 presence_penalty: Some(0.0),
486 uniform_cache: Some(true),
487 jinja: Some(true),
488 ..Default::default()
489 },
490 },
491 Profile {
492 name: "Gemma".into(),
493 description: "Optimized for Gemma 2/4 models".into(),
494 settings: ModelOverride {
495 context_length: Some(131072),
496 min_p: Some(0.1),
497 temperature: Some(1.0),
498 top_k: Some(65),
499 top_p: Some(0.95),
500 max_tokens: Some(4096),
501 uniform_cache: Some(true),
502 jinja: Some(true),
503 ..Default::default()
504 },
505 },
506 Profile {
507 name: "Llama".into(),
508 description: "Optimized for Llama 3.1/3.3 models".into(),
509 settings: ModelOverride {
510 context_length: Some(131072),
511 temperature: Some(0.7),
512 top_p: Some(0.9),
513 repeat_penalty: Some(1.1),
514 max_tokens: Some(4096),
515 uniform_cache: Some(true),
516 jinja: Some(true),
517 ..Default::default()
518 },
519 },
520 Profile {
521 name: "Mistral".into(),
522 description: "Optimized for Mistral 7B/NeMo models".into(),
523 settings: ModelOverride {
524 context_length: Some(131072),
525 temperature: Some(0.7),
526 top_k: Some(50),
527 top_p: Some(0.9),
528 max_tokens: Some(4096),
529 uniform_cache: Some(true),
530 jinja: Some(true),
531 ..Default::default()
532 },
533 },
534 Profile {
535 name: "Phi".into(),
536 description: "Optimized for Phi 3.5 Mini models".into(),
537 settings: ModelOverride {
538 context_length: Some(131072),
539 temperature: Some(0.7),
540 top_k: Some(50),
541 top_p: Some(0.9),
542 repeat_penalty: Some(1.1),
543 max_tokens: Some(4096),
544 uniform_cache: Some(true),
545 ..Default::default()
546 },
547 },
548 ]
549}
550
551#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
552#[serde(default)]
553pub struct DefaultParams {
554 #[serde(default)]
556 pub context_length: u32,
557 #[serde(default)]
558 pub threads: u32,
559 #[serde(default)]
560 pub threads_batch: u32,
561 #[serde(default)]
562 pub batch_size: u32,
563 #[serde(default)]
564 pub ubatch_size: u32,
565 #[serde(default = "default_cache_type_k")]
566 pub cache_type_k: Option<CacheTypeK>,
567 #[serde(default = "default_cache_type_v")]
568 pub cache_type_v: Option<CacheTypeV>,
569 #[serde(default)]
570 pub keep: i32,
571 #[serde(default)]
572 pub swa_full: bool,
573 #[serde(default)]
574 pub mlock: bool,
575 #[serde(default)]
576 pub mmap: bool,
577 #[serde(default)]
578 pub numa: NumMode,
579 #[serde(default)]
580 pub uniform_cache: bool,
581 #[serde(default)]
582 pub kv_cache_offload: bool,
583 #[serde(default)]
584 pub parallel: u32,
585 #[serde(default)]
586 pub max_concurrent_predictions: Option<u32>,
587 #[serde(default)]
588 pub system_prompt: String,
589 #[serde(default = "default_system_prompt_preset_name")]
590 pub system_prompt_preset_name: String,
591 #[serde(default)]
593 pub gpu_layers: i32,
594 #[serde(default = "default_gpu_layers_mode")]
595 pub gpu_layers_mode: crate::models::GpuLayersMode,
596 #[serde(default)]
597 pub split_mode: SplitMode,
598 #[serde(default)]
599 pub tensor_split: String,
600 #[serde(default)]
601 pub main_gpu: i32,
602 #[serde(default)]
603 pub fit: bool,
604 #[serde(default)]
605 pub lora: Option<PathBuf>,
606 #[serde(default)]
607 pub lora_scaled: Option<(PathBuf, f32)>,
608 #[serde(default)]
609 pub rpc: String,
610 #[serde(default)]
611 pub embedding: bool,
612 #[serde(default)]
613 pub flash_attn: bool,
614 #[serde(default)]
615 pub jinja: bool,
616 #[serde(default)]
617 pub chat_template: Option<String>,
618 #[serde(default)]
619 pub chat_template_kwargs: Option<String>,
620 #[serde(default)]
621 pub expert_count: i32,
622
623 #[serde(default)]
625 pub seed: i32,
626 #[serde(default)]
627 pub temperature: f32,
628 #[serde(default)]
629 pub top_k: i32,
630 #[serde(default)]
631 pub top_p: f32,
632 #[serde(default)]
633 pub min_p: f32,
634 #[serde(default)]
635 pub typical_p: f32,
636 #[serde(default)]
637 pub mirostat: Mirostat,
638 #[serde(default)]
639 pub mirostat_lr: f32,
640 #[serde(default)]
641 pub mirostat_ent: f32,
642 #[serde(default)]
643 pub ignore_eos: bool,
644 #[serde(default)]
645 pub samplers: Samplers,
646
647 #[serde(default)]
649 pub repeat_penalty: f32,
650 #[serde(default)]
651 pub repeat_last_n: i32,
652 #[serde(default = "default_presence_penalty")]
653 pub presence_penalty: Option<f32>,
654 #[serde(default = "default_frequency_penalty")]
655 pub frequency_penalty: Option<f32>,
656 #[serde(default)]
657 pub dry_multiplier: f32,
658 #[serde(default)]
659 pub dry_base: f32,
660 #[serde(default)]
661 pub dry_allowed_length: i32,
662 #[serde(default)]
663 pub dry_penalty_last_n: i32,
664
665 #[serde(default)]
667 pub rope_scaling: RopeScaling,
668 #[serde(default)]
669 pub rope_scale: f32,
670 #[serde(default)]
671 pub rope_freq_base: f32,
672 #[serde(default)]
673 pub rope_freq_scale: f32,
674 #[serde(default)]
675 pub rope_yarn_enabled: bool,
676
677 #[serde(default)]
679 pub host: String,
680 #[serde(default)]
681 pub port: u16,
682 #[serde(default)]
683 pub timeout: u32,
684 #[serde(default = "default_cache_prompt")]
685 pub cache_prompt: bool,
686 #[serde(default)]
687 pub cache_reuse: u32,
688 #[serde(default)]
689 pub webui: bool,
690 #[serde(default)]
691 pub ws_server_enabled: bool,
692 #[serde(default = "default_ws_server_port")]
693 pub ws_server_port: u16,
694 #[serde(default)]
695 pub ws_server_auth_key: Option<String>,
696 #[serde(default = "default_ws_server_tls_enabled")]
697 pub ws_server_tls_enabled: bool,
698 #[serde(default)]
699 pub ws_server_tls_cert: Option<String>,
700 #[serde(default)]
701 pub ws_server_tls_key: Option<String>,
702 #[serde(default)]
703 pub router_max_models: u32,
704 #[serde(default)]
705 pub server_mode: crate::models::ServerMode,
706
707 #[serde(default = "default_max_tokens")]
709 pub max_tokens: Option<u32>,
710 #[serde(default)]
711 pub cache_type: CacheType,
712 #[serde(default)]
713 pub backend: Backend,
714 #[serde(default)]
716 pub platform: Option<String>,
717 #[serde(default)]
718 pub llama_cpp_version_cpu: Option<String>,
719 #[serde(default)]
720 pub llama_cpp_version_vulkan: Option<String>,
721 #[serde(default)]
722 pub llama_cpp_version_rocm: Option<String>,
723 #[serde(default)]
724 pub llama_cpp_version_rocm_lemonade: Option<String>,
725 #[serde(default)]
726 pub llama_cpp_version_cuda: Option<String>,
727
728 #[serde(default)]
730 pub api_endpoint_enabled: bool,
731 #[serde(default = "default_api_endpoint_port")]
732 pub api_endpoint_port: u16,
733 #[serde(default)]
734 pub spec_type: String,
735 #[serde(default)]
736 pub draft_tokens: u32,
737 #[serde(default)]
738 pub tags: Vec<String>,
739}
740
741fn default_api_endpoint_port() -> u16 {
742 49222
743}
744
745fn default_system_prompt_preset_name() -> String {
746 "General".to_string()
747}
748
749fn default_cache_type_k() -> Option<CacheTypeK> {
750 None
751}
752fn default_cache_type_v() -> Option<CacheTypeV> {
753 None
754}
755fn default_presence_penalty() -> Option<f32> {
756 None
757}
758fn default_frequency_penalty() -> Option<f32> {
759 None
760}
761fn default_max_tokens() -> Option<u32> {
762 None
763}
764fn default_cache_prompt() -> bool {
765 true
766}
767fn default_ws_server_port() -> u16 {
768 49223
769 }
770 fn default_ws_server_tls_enabled() -> bool {
771 true
772 }
773 fn default_gpu_layers_mode() -> crate::models::GpuLayersMode {
774 crate::models::GpuLayersMode::Auto
775}
776
777impl Default for DefaultParams {
778 fn default() -> Self {
779 Self {
780 context_length: 131072,
782 threads: physical_cores(),
783 threads_batch: 8,
784 batch_size: 512,
785 ubatch_size: 512,
786 cache_type_k: None,
787 cache_type_v: None,
788 keep: 0,
789 swa_full: false,
790 mlock: false,
791 mmap: true,
792 numa: NumMode::None,
793 uniform_cache: true,
794 kv_cache_offload: true,
795 parallel: 1,
796 max_concurrent_predictions: None,
797 system_prompt: "You are a helpful assistant.".to_string(),
798 system_prompt_preset_name: "General".to_string(),
799
800 gpu_layers: -1,
802 gpu_layers_mode: crate::models::GpuLayersMode::Auto,
803 split_mode: SplitMode::Layer,
804 tensor_split: String::new(),
805 main_gpu: 0,
806 fit: true,
807 lora: None,
808 lora_scaled: None,
809 rpc: String::new(),
810 embedding: false,
811 flash_attn: true,
812 jinja: true,
813 chat_template: None,
814 chat_template_kwargs: None,
815 expert_count: -1,
816
817 seed: -1,
819 temperature: 0.8,
820 top_k: 40,
821 top_p: 0.95,
822 min_p: 0.0,
823 typical_p: 1.0,
824 mirostat: Mirostat::Off,
825 mirostat_lr: 0.1,
826 mirostat_ent: 5.0,
827 ignore_eos: false,
828 samplers: Samplers::default(),
829
830 repeat_penalty: 1.1,
832 repeat_last_n: 64,
833 presence_penalty: None,
834 frequency_penalty: None,
835 dry_multiplier: 0.0,
836 dry_base: 1.75,
837 dry_allowed_length: 2,
838 dry_penalty_last_n: -1,
839
840 rope_scaling: RopeScaling::None,
842 rope_scale: 1.0,
843 rope_freq_base: 0.0,
844 rope_freq_scale: 1.0,
845 rope_yarn_enabled: false,
846
847 host: "127.0.0.1".to_string(),
849 port: 8080,
850 timeout: 600,
851 cache_prompt: true,
852 cache_reuse: 0,
853 webui: false,
854 ws_server_enabled: false,
855 ws_server_port: 49223,
856 ws_server_auth_key: None,
857 ws_server_tls_enabled: true,
858 ws_server_tls_cert: None,
859 ws_server_tls_key: None,
860 router_max_models: 4,
861 server_mode: crate::models::ServerMode::Normal,
862
863 max_tokens: None,
865 cache_type: CacheType::F16,
866 backend: {
867 use crate::backend::hardware::{GpuVendor, detect_gpu_vendors};
868 let vendors = detect_gpu_vendors();
869 let mut result = Backend::Cpu;
870 for v in &vendors {
871 if matches!(v, GpuVendor::Nvidia) {
872 result = Backend::Cuda;
873 break;
874 }
875 if matches!(v, GpuVendor::Amd) {
876 result = Backend::Rocm;
877 break;
878 }
879 if matches!(v, GpuVendor::Intel) {
880 result = Backend::Vulkan;
881 break;
882 }
883 }
884 result
885 },
886 platform: None,
887 llama_cpp_version_cpu: None,
888 llama_cpp_version_vulkan: None,
889 llama_cpp_version_rocm: None,
890 llama_cpp_version_rocm_lemonade: None,
891 llama_cpp_version_cuda: None,
892 api_endpoint_enabled: false,
893 api_endpoint_port: 49222,
894 spec_type: String::new(),
895 draft_tokens: 0,
896 tags: Vec::new(),
897 }
898 }
899}
900
901impl Default for Config {
902 fn default() -> Self {
903 Self {
904 models_dirs: vec![
905 dirs::data_dir()
906 .unwrap_or_default()
907 .join("llm-manager")
908 .join("models"),
909 ],
910 llama_server: "llama-server".into(),
911 default: DefaultParams::default(),
912 model_overrides: Default::default(),
913 profiles: Default::default(),
914 system_prompt_presets: Default::default(),
915 rpc_workers: Vec::new(),
916 search_limit: default_search_limit(),
917 active_panel: ActivePanel::Models,
918 left_pct: 55,
919 }
920 }
921}
922
923impl Config {
924 pub fn config_path() -> PathBuf {
925 config_base_dir()
926 .join("llm-manager")
927 .join("config.yaml")
928 }
929
930 pub fn validate(&self) -> Vec<String> {
932 let mut warnings = Vec::new();
933 let default = &self.default;
934
935 if default.context_length < 512 || default.context_length > 131072 {
937 warnings.push(format!(
938 "context_length {} is outside recommended range 512-131072",
939 default.context_length
940 ));
941 }
942 if default.temperature < 0.0 || default.temperature > 2.0 {
943 warnings.push(format!(
944 "temperature {} is outside recommended range 0.0-2.0",
945 default.temperature
946 ));
947 }
948 if (default.top_p < 0.0 || default.top_p > 1.0) && default.top_p != 0.0 {
949 warnings.push(format!(
950 "top_p {} is outside recommended range 0.0-1.0",
951 default.top_p
952 ));
953 }
954 if (default.repeat_penalty < 0.0 || default.repeat_penalty > 3.0)
955 && default.repeat_penalty != 1.0
956 {
957 warnings.push(format!(
958 "repeat_penalty {} is outside recommended range 0.0-3.0",
959 default.repeat_penalty
960 ));
961 }
962 if default.mirostat_lr < 0.0 || default.mirostat_lr > 1.0 {
963 warnings.push(format!(
964 "mirostat_lr {} is outside recommended range 0.0-1.0",
965 default.mirostat_lr
966 ));
967 }
968 if default.mirostat_ent < 0.0 || default.mirostat_ent > 10.0 {
969 warnings.push(format!(
970 "mirostat_ent {} is outside recommended range 0.0-10.0",
971 default.mirostat_ent
972 ));
973 }
974
975 if default.timeout < 1 {
976 warnings.push(format!(
977 "timeout {} must be at least 1 second",
978 default.timeout
979 ));
980 }
981
982 if let Some(lora) = &default.lora
984 && !lora.exists() {
985 warnings.push(format!("lora path {} does not exist", lora.display()));
986 }
987 if let Some((lora, _)) = &default.lora_scaled
988 && !lora.exists() {
989 warnings.push(format!("lora path {} does not exist", lora.display()));
990 }
991
992 for model_name in self.model_overrides.keys() {
994 if let Some(override_settings) = self.model_overrides.get(model_name.as_str()) {
995 if let Some(lora) = &override_settings.lora
996 && !lora.exists() {
997 warnings.push(format!(
998 "model '{}' lora path {} does not exist",
999 model_name,
1000 lora.display()
1001 ));
1002 }
1003 if let Some((lora, _)) = &override_settings.lora_scaled
1004 && !lora.exists() {
1005 warnings.push(format!(
1006 "model '{}' lora path {} does not exist",
1007 model_name,
1008 lora.display()
1009 ));
1010 }
1011 }
1012 }
1013
1014 warnings
1015 }
1016
1017 pub fn resolve_settings(
1019 &self,
1020 model_name: Option<&str>,
1021 profile_name: Option<&str>,
1022 ) -> crate::models::ModelSettings {
1023 let mut settings = crate::models::ModelSettings::from_config(self);
1024
1025 if let Some(name) = model_name
1027 && let Some(override_settings) = self.model_overrides.get(name)
1028 {
1029 override_settings.apply(&mut settings);
1030 }
1031
1032 if let Some(p_name) = profile_name {
1034 if let Some(profile) = self.profiles.get(p_name) {
1035 profile.settings.apply(&mut settings);
1036 } else if let Some(profile) = builtin_profiles().iter().find(|p| p.name == p_name) {
1037 profile.settings.apply(&mut settings);
1038 }
1039 }
1040
1041 settings
1042 }
1043
1044 pub fn get_preset_content(&self, name: &str) -> Option<String> {
1046 self.system_prompt_presets
1047 .get(name)
1048 .map(|p| p.content.clone())
1049 }
1050
1051 fn normalize_config(mut config: Config) -> Config {
1052 for path in &mut config.models_dirs {
1054 let path_str = path.to_string_lossy();
1055 if let Some(stripped) = path_str.strip_prefix("~/") {
1056 let home = dirs::home_dir().unwrap_or_default();
1057 *path = home.join(stripped);
1058 } else if !path.is_absolute() {
1059 let home = dirs::home_dir().unwrap_or_default();
1060 *path = home.join(path_str.as_ref());
1061 }
1062 }
1063
1064 for p in builtin_profiles() {
1066 if config.profiles.get(&p.name).is_none() {
1067 config.profiles.insert_builtin(p);
1068 }
1069 }
1070
1071 for p in builtin_system_prompt_presets() {
1073 if config.system_prompt_presets.get(&p.name).is_none() {
1074 config.system_prompt_presets.insert_builtin(p);
1075 }
1076 }
1077 config
1078 }
1079
1080 fn load_impl(path: &PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1081 let content = std::fs::read_to_string(path)?;
1082 let config: Config = serde_yaml::from_str(&content)
1083 .map_err(|e| format!("Failed to parse config file {}: {}", path.display(), e))?;
1084 let config = Self::normalize_config(config);
1085 let config = config.auto_detect_platform();
1086 let warnings = config.validate();
1087 if !warnings.is_empty() {
1088 eprintln!("Config validation warnings:");
1089 for warning in &warnings {
1090 eprintln!(" - {}", warning);
1091 }
1092 }
1093 Ok(config)
1094 }
1095
1096 pub fn load() -> Result<Self, Box<dyn std::error::Error>> {
1097 let path = Self::config_path();
1098 if path.exists() {
1099 Self::load_impl(&path)
1100 } else {
1101 let mut config = Config::default();
1102 config.save()?;
1103 Ok(config)
1104 }
1105 }
1106
1107 pub fn load_from(path: PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1108 if path.exists() {
1109 Self::load_impl(&path)
1110 } else {
1111 Err(format!("Config file not found: {}", path.display()).into())
1112 }
1113 }
1114
1115 fn auto_detect_platform(mut self) -> Self {
1117 if self.default.platform.is_none() {
1118 self.default.platform =
1119 Some(
1120 crate::backend::hardware::platform_name(
1121 crate::backend::hardware::detect_platform(),
1122 )
1123 .to_string(),
1124 );
1125 }
1126 self
1127 }
1128
1129 pub fn save(&mut self) -> Result<(), Box<dyn std::error::Error>> {
1130 let path = Self::config_path();
1131 if let Some(parent) = path.parent() {
1132 std::fs::create_dir_all(parent)?;
1133 }
1134 let content = serde_yaml::to_string(self)?;
1135 std::fs::write(&path, content)?;
1136 let entries: Vec<(String, ModelOverride)> = self
1138 .model_overrides
1139 .keys()
1140 .iter()
1141 .filter_map(|k| self.model_overrides.get(k).map(|v| (k.clone(), v.clone())))
1142 .collect();
1143 for (name, cfg) in entries {
1144 self.model_overrides.save(&name, &cfg);
1145 }
1146 for profile in self.profiles.user_profiles() {
1148 self.profiles.save(&profile);
1149 }
1150 for preset in self.system_prompt_presets.user_presets() {
1152 self.system_prompt_presets.save(&preset);
1153 }
1154 Ok(())
1155 }
1156
1157 pub fn merged_profiles(&self) -> Vec<Profile> {
1158 self.profiles.all()
1159 }
1160
1161 pub fn merged_presets(&self) -> Vec<SystemPromptPreset> {
1162 self.system_prompt_presets.all()
1163 }
1164}
1165
1166#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1167pub enum LogLevel {
1168 Info,
1169 Warning,
1170 Error,
1171}
1172
1173impl LogLevel {
1174 pub fn label(&self) -> &'static str {
1175 match self {
1176 LogLevel::Info => "INFO",
1177 LogLevel::Warning => "WARNING",
1178 LogLevel::Error => "ERROR",
1179 }
1180 }
1181}
1182
1183#[derive(Debug, Clone)]
1184pub struct LogEntry {
1185 pub timestamp: String,
1186 pub level: LogLevel,
1187 pub message: String,
1188}
1189
1190impl LogEntry {
1191 pub fn new(message: impl Into<String>, level: LogLevel) -> Self {
1192 let timestamp = Local::now().format("%H:%M:%S").to_string();
1193 let message = sanitize_log(&message.into());
1194 Self {
1195 timestamp,
1196 level,
1197 message,
1198 }
1199 }
1200}
1201
1202fn sanitize_log(input: &str) -> String {
1205 let max_len = 2000;
1207 let chars: Vec<char> = input.chars().collect();
1208 let truncated = chars.len() > max_len;
1209 let chars = if truncated {
1210 chars[..max_len].to_vec()
1211 } else {
1212 chars
1213 };
1214
1215 let mut output = String::with_capacity(chars.len());
1216 for c in chars {
1217 if c.is_control() && c != '\n' && c != '\t' {
1220 continue;
1221 }
1222 output.push(c);
1223 }
1224
1225 let output = output.replace('\t', " ");
1227
1228 let mut result = output.trim_end().to_string();
1230 if truncated {
1231 result.push_str("... (truncated)");
1232 }
1233 result
1234}