1mod model_config;
2mod presets;
3mod profiles;
4mod store;
5
6use std::collections::HashSet;
7use std::path::PathBuf;
8
9use chrono::Local;
10use serde::{Deserialize, Serialize};
11
12#[allow(unused_imports)]
13pub use model_config::{display_from_key, key_from_display, ModelConfigStore};
14
15pub use profiles::ProfileStore;
16
17use crate::models::{
18 Backend, CacheType, CacheTypeK, CacheTypeV, Mirostat, NumMode, RopeScaling, Samplers, SplitMode,
19};
20use crate::tui::app::ActivePanel;
21pub use presets::PresetStore;
22
23pub const DEFAULT_SYSTEM_PROMPT: &str = "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.";
25
26pub fn config_base_dir() -> PathBuf {
31 if let Some(d) = dirs::config_dir() {
32 return d;
33 }
34 if let Some(home) = dirs::home_dir() {
35 return home.join(".config");
36 }
37 PathBuf::from(".").join(".llm-manager")
38}
39
40pub fn physical_cores() -> u32 {
43 let content = match std::fs::read_to_string("/proc/cpuinfo") {
44 Ok(c) => c,
45 Err(_) => {
46 return std::thread::available_parallelism()
47 .map(|p| p.get() as u32)
48 .unwrap_or(1);
49 }
50 };
51 let mut seen = HashSet::new();
52 let mut cur_phys: Option<&str> = None;
53 let mut cur_core: Option<&str> = None;
54 for line in content.lines() {
55 if let Some((key, val)) = line.split_once(':') {
56 let key = key.trim();
57 let val = val.trim();
58 match key {
59 "physical id" => cur_phys = Some(val),
60 "core id" => cur_core = Some(val),
61 _ => {}
62 }
63 if let (Some(phys), Some(core)) = (cur_phys, cur_core) {
64 seen.insert((phys, core));
65 }
66 }
67 }
68 seen.len() as u32
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct RpcWorker {
74 #[serde(default)]
75 pub selected: bool,
76 #[serde(default)]
77 pub name: String,
78 pub ip: String,
79 #[serde(default = "default_rpc_port")]
80 pub port: u16,
81}
82
83fn default_rpc_port() -> u16 {
84 50052
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct Config {
90 pub models_dirs: Vec<PathBuf>,
91 pub llama_server: PathBuf,
92 pub default: DefaultParams,
93 #[serde(default, skip)]
95 pub model_overrides: ModelConfigStore,
96 #[serde(default, skip)]
98 pub profiles: ProfileStore,
99 #[serde(default, skip)]
101 pub system_prompt_presets: PresetStore,
102 #[serde(default)]
104 pub rpc_workers: Vec<RpcWorker>,
105 #[serde(default = "default_search_limit")]
107 pub search_limit: u32,
108 #[serde(default)]
110 pub active_panel: crate::tui::app::ActivePanel,
111 #[serde(default = "default_left_pct")]
113 pub left_pct: u16,
114 #[serde(default = "default_language")]
116 pub language: String,
117}
118
119fn default_language() -> String {
120 "en".to_string()
121}
122
123fn default_left_pct() -> u16 {
124 55
125}
126
127fn default_search_limit() -> u32 {
128 50
129}
130
131#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
133pub struct Profile {
134 pub name: String,
135 pub description: String,
137 #[serde(default)]
139 pub settings: ModelOverride,
140}
141
142impl Profile {
143 pub fn apply(&self, mut base: crate::models::ModelSettings) -> crate::models::ModelSettings {
145 self.settings.apply(&mut base);
146 base
147 }
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct SystemPromptPreset {
153 pub name: String,
154 pub description: String,
155 pub content: String,
156}
157
158pub fn builtin_system_prompt_presets() -> Vec<SystemPromptPreset> {
160 vec![
161 SystemPromptPreset {
162 name: "General".into(),
163 description: "General-purpose assistant".into(),
164 content: "You are a helpful assistant.".into(),
165 },
166 SystemPromptPreset {
167 name: "Coder".into(),
168 description: "Expert software developer".into(),
169 content: "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.".into(),
170 },
171 SystemPromptPreset {
172 name: "Thinker".into(),
173 description: "Analytical and thoughtful".into(),
174 content: "You are a thoughtful and analytical AI assistant. Think carefully before answering. Provide well-reasoned responses with clear explanations.".into(),
175 },
176 SystemPromptPreset {
177 name: "Mathematician".into(),
178 description: "Expert in mathematics".into(),
179 content: "You are an expert in mathematics. Provide clear, step-by-step solutions to mathematical problems. Show your reasoning and explain key concepts.".into(),
180 },
181 ]
182}
183
184#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
185pub struct ModelOverride {
186 pub context_length: Option<u32>,
188 pub batch_size: Option<u32>,
189 pub ubatch_size: Option<u32>,
190 pub cache_type_k: Option<CacheTypeK>,
191 pub cache_type_v: Option<CacheTypeV>,
192 pub keep: Option<i32>,
193 pub swa_full: Option<bool>,
194 pub mlock: Option<bool>,
195 pub mmap: Option<bool>,
196 pub numa: Option<NumMode>,
197 pub uniform_cache: Option<bool>,
198 pub system_prompt: Option<String>,
199 pub system_prompt_preset_name: Option<String>,
200 pub max_concurrent_predictions: Option<u32>,
201 pub threads: Option<u32>,
202 pub threads_batch: Option<u32>,
203 pub parallel: Option<u32>,
204
205 pub gpu_layers: Option<i32>,
207 pub split_mode: Option<SplitMode>,
208 pub tensor_split: Option<String>,
209 pub main_gpu: Option<i32>,
210 pub fit: Option<bool>,
211 pub lora: Option<PathBuf>,
212 pub lora_scaled: Option<(PathBuf, f32)>,
213 pub rpc: Option<String>,
214 pub embedding: Option<bool>,
215 pub kv_cache_offload: Option<bool>,
216 pub flash_attn: Option<bool>,
217 pub jinja: Option<bool>,
218 pub chat_template: Option<String>,
219 pub chat_template_kwargs: Option<String>,
220 pub expert_count: Option<i32>,
221 pub gpu_layers_mode: Option<crate::models::GpuLayersMode>,
222
223 pub seed: Option<i32>,
225 pub temperature: Option<f32>,
226 pub top_k: Option<i32>,
227 pub top_p: Option<f32>,
228 pub min_p: Option<f32>,
229 pub typical_p: Option<f32>,
230 pub mirostat: Option<Mirostat>,
231 pub mirostat_lr: Option<f32>,
232 pub mirostat_ent: Option<f32>,
233 pub ignore_eos: Option<bool>,
234 pub samplers: Option<Samplers>,
235
236 pub repeat_penalty: Option<f32>,
238 pub repeat_last_n: Option<i32>,
239 pub presence_penalty: Option<f32>,
240 pub frequency_penalty: Option<f32>,
241 pub dry_multiplier: Option<f32>,
242 pub dry_base: Option<f32>,
243 pub dry_allowed_length: Option<i32>,
244 pub dry_penalty_last_n: Option<i32>,
245
246 pub rope_scaling: Option<RopeScaling>,
248 pub rope_scale: Option<f32>,
249 pub rope_freq_base: Option<f32>,
250 pub rope_freq_scale: Option<f32>,
251 pub rope_yarn_enabled: Option<bool>,
252
253 pub cache_prompt: Option<bool>,
255 pub cache_reuse: Option<u32>,
256 pub webui: Option<bool>,
257
258 pub max_tokens: Option<u32>,
260 pub cache_type: Option<CacheType>,
261 pub llama_cpp_version_cpu: Option<String>,
262 pub llama_cpp_version_vulkan: Option<String>,
263 pub llama_cpp_version_rocm: Option<String>,
264 pub llama_cpp_version_rocm_lemonade: Option<String>,
265 pub llama_cpp_version_cuda: Option<String>,
266 pub spec_type: Option<String>,
267 pub draft_tokens: Option<u32>,
268 pub tags: Option<Vec<String>>,
269}
270
271macro_rules! apply_scalar {
273 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
274 $(
275 $base.$field = $self.$field.unwrap_or($base.$field);
276 )+
277 };
278}
279
280macro_rules! apply_clone {
282 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
283 $(
284 if let Some(v) = &$self.$field {
285 $base.$field = v.clone();
286 }
287 )+
288 };
289}
290
291macro_rules! apply_option {
293 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
294 $(
295 if let Some(v) = &$self.$field {
296 $base.$field = Some(v.clone());
297 }
298 )+
299 };
300}
301
302impl ModelOverride {
303 pub fn from_settings(s: &crate::models::ModelSettings) -> Self {
304 Self {
305 context_length: Some(s.context_length),
306 batch_size: Some(s.batch_size),
307 ubatch_size: Some(s.ubatch_size),
308 cache_type_k: s.cache_type_k,
309 cache_type_v: s.cache_type_v,
310 keep: Some(s.keep),
311 swa_full: Some(s.swa_full),
312 mlock: Some(s.mlock),
313 mmap: Some(s.mmap),
314 numa: Some(s.numa),
315 uniform_cache: Some(s.uniform_cache),
316 system_prompt: Some(s.system_prompt.clone()),
317 system_prompt_preset_name: Some(s.system_prompt_preset_name.clone()),
318 max_concurrent_predictions: s.max_concurrent_predictions,
319 threads: Some(s.threads),
320 threads_batch: Some(s.threads_batch),
321 parallel: Some(s.parallel),
322 gpu_layers: Some(match s.gpu_layers_mode {
323 crate::models::GpuLayersMode::Auto => 0,
324 crate::models::GpuLayersMode::Specific(n) => n as i32,
325 crate::models::GpuLayersMode::All => -1,
326 }),
327 gpu_layers_mode: Some(s.gpu_layers_mode),
328 split_mode: Some(s.split_mode),
329 tensor_split: Some(s.tensor_split.clone()),
330 main_gpu: Some(s.main_gpu),
331 fit: Some(s.fit),
332 lora: s.lora.clone(),
333 lora_scaled: s.lora_scaled.clone(),
334 rpc: Some(s.rpc.clone()),
335 embedding: Some(s.embedding),
336 kv_cache_offload: Some(s.kv_cache_offload),
337 flash_attn: Some(s.flash_attn),
338 jinja: Some(s.jinja),
339 chat_template: s.chat_template.clone(),
340 chat_template_kwargs: s.chat_template_kwargs.clone(),
341 expert_count: Some(s.expert_count),
342 seed: Some(s.seed),
343 temperature: Some(s.temperature),
344 top_k: Some(s.top_k),
345 top_p: Some(s.top_p),
346 min_p: Some(s.min_p),
347 typical_p: Some(s.typical_p),
348 mirostat: Some(s.mirostat),
349 mirostat_lr: Some(s.mirostat_lr),
350 mirostat_ent: Some(s.mirostat_ent),
351 ignore_eos: Some(s.ignore_eos),
352 samplers: Some(s.samplers.clone()),
353 repeat_penalty: Some(s.repeat_penalty),
354 repeat_last_n: Some(s.repeat_last_n),
355 presence_penalty: s.presence_penalty,
356 frequency_penalty: s.frequency_penalty,
357 dry_multiplier: Some(s.dry_multiplier),
358 dry_base: Some(s.dry_base),
359 dry_allowed_length: Some(s.dry_allowed_length),
360 dry_penalty_last_n: Some(s.dry_penalty_last_n),
361 rope_scaling: Some(s.rope_scaling),
362 rope_scale: Some(s.rope_scale),
363 rope_freq_base: Some(s.rope_freq_base),
364 rope_freq_scale: Some(s.rope_freq_scale),
365 rope_yarn_enabled: Some(s.rope_yarn_enabled),
366 cache_prompt: Some(s.cache_prompt),
367 cache_reuse: Some(s.cache_reuse),
368 webui: Some(s.webui),
369 max_tokens: s.max_tokens,
370 cache_type: Some(s.cache_type),
371 llama_cpp_version_cpu: s.llama_cpp_version_cpu.clone(),
372 llama_cpp_version_vulkan: s.llama_cpp_version_vulkan.clone(),
373 llama_cpp_version_rocm: s.llama_cpp_version_rocm.clone(),
374 llama_cpp_version_rocm_lemonade: s.llama_cpp_version_rocm_lemonade.clone(),
375 llama_cpp_version_cuda: s.llama_cpp_version_cuda.clone(),
376 spec_type: Some(s.spec_type.clone()),
377 draft_tokens: Some(s.draft_tokens),
378 tags: Some(s.tags.clone()),
379 }
380 }
381
382 pub fn apply(&self, base: &mut crate::models::ModelSettings) {
384 apply_scalar!(
389 self,
390 base,
391 context_length,
392 batch_size,
393 ubatch_size,
394 keep,
395 swa_full,
396 mlock,
397 mmap,
398 numa,
399 uniform_cache,
400 kv_cache_offload,
401 threads,
402 threads_batch,
403 parallel,
404 split_mode,
405 main_gpu,
406 fit,
407 embedding,
408 flash_attn,
409 jinja,
410 expert_count,
411 seed,
412 temperature,
413 top_k,
414 top_p,
415 min_p,
416 typical_p,
417 mirostat,
418 mirostat_lr,
419 mirostat_ent,
420 ignore_eos,
421 repeat_penalty,
422 repeat_last_n,
423 dry_multiplier,
424 dry_base,
425 dry_allowed_length,
426 dry_penalty_last_n,
427 rope_scaling,
428 rope_scale,
429 rope_freq_base,
430 rope_freq_scale,
431 rope_yarn_enabled,
432 cache_prompt,
433 cache_reuse,
434 webui,
435 cache_type,
436 draft_tokens,
437 gpu_layers_mode,
438 );
439
440 apply_clone!(
442 self,
443 base,
444 system_prompt,
445 system_prompt_preset_name,
446 tensor_split,
447 rpc,
448 samplers,
449 spec_type,
450 tags,
451 );
452
453 apply_option!(
455 self,
456 base,
457 lora,
458 lora_scaled,
459 chat_template,
460 chat_template_kwargs,
461 llama_cpp_version_cpu,
462 llama_cpp_version_vulkan,
463 llama_cpp_version_rocm,
464 llama_cpp_version_rocm_lemonade,
465 llama_cpp_version_cuda,
466 );
467
468 base.cache_type_k = self.cache_type_k;
470 base.cache_type_v = self.cache_type_v;
471 base.presence_penalty = self.presence_penalty;
472 base.frequency_penalty = self.frequency_penalty;
473 base.max_tokens = self.max_tokens;
474
475 base.max_concurrent_predictions = self
477 .max_concurrent_predictions
478 .or(base.max_concurrent_predictions);
479
480 if let Some(n) = self.gpu_layers {
483 base.gpu_layers_mode = match n {
484 n if n < 0 => crate::models::GpuLayersMode::All,
485 n => crate::models::GpuLayersMode::Specific(n as u32),
486 };
487 }
488
489 }
505}
506
507pub fn builtin_profiles() -> Vec<Profile> {
509 vec![
510 Profile {
511 name: "Qwen".into(),
512 description: "Optimized for Qwen models (dense)".into(),
513 settings: ModelOverride {
514 context_length: Some(131072),
515 temperature: Some(0.7),
516 top_k: Some(20),
517 top_p: Some(0.95),
518 max_tokens: Some(4096),
519 presence_penalty: Some(0.0),
520 uniform_cache: Some(true),
521 jinja: Some(true),
522 ..Default::default()
523 },
524 },
525 Profile {
526 name: "Qwen-MoE".into(),
527 description: "Optimized for Qwen MoE models (35B-A3B)".into(),
528 settings: ModelOverride {
529 context_length: Some(131072),
530 temperature: Some(0.8),
531 top_k: Some(20),
532 top_p: Some(0.95),
533 max_tokens: Some(4096),
534 presence_penalty: Some(1.5),
535 uniform_cache: Some(true),
536 jinja: Some(true),
537 ..Default::default()
538 },
539 },
540 Profile {
541 name: "Qwen-Coding".into(),
542 description: "Optimized for Qwen models in coding mode".into(),
543 settings: ModelOverride {
544 context_length: Some(131072),
545 temperature: Some(0.6),
546 top_k: Some(20),
547 top_p: Some(0.95),
548 max_tokens: Some(4096),
549 presence_penalty: Some(0.0),
550 uniform_cache: Some(true),
551 jinja: Some(true),
552 ..Default::default()
553 },
554 },
555 Profile {
556 name: "Gemma".into(),
557 description: "Optimized for Gemma 2/4 models".into(),
558 settings: ModelOverride {
559 context_length: Some(131072),
560 min_p: Some(0.1),
561 temperature: Some(1.0),
562 top_k: Some(65),
563 top_p: Some(0.95),
564 max_tokens: Some(4096),
565 uniform_cache: Some(true),
566 jinja: Some(true),
567 ..Default::default()
568 },
569 },
570 Profile {
571 name: "Llama".into(),
572 description: "Optimized for Llama 3.1/3.3 models".into(),
573 settings: ModelOverride {
574 context_length: Some(131072),
575 temperature: Some(0.7),
576 top_p: Some(0.9),
577 repeat_penalty: Some(1.1),
578 max_tokens: Some(4096),
579 uniform_cache: Some(true),
580 jinja: Some(true),
581 ..Default::default()
582 },
583 },
584 Profile {
585 name: "Mistral".into(),
586 description: "Optimized for Mistral 7B/NeMo models".into(),
587 settings: ModelOverride {
588 context_length: Some(131072),
589 temperature: Some(0.7),
590 top_k: Some(50),
591 top_p: Some(0.9),
592 max_tokens: Some(4096),
593 uniform_cache: Some(true),
594 jinja: Some(true),
595 ..Default::default()
596 },
597 },
598 Profile {
599 name: "Phi".into(),
600 description: "Optimized for Phi 3.5 Mini models".into(),
601 settings: ModelOverride {
602 context_length: Some(131072),
603 temperature: Some(0.7),
604 top_k: Some(50),
605 top_p: Some(0.9),
606 repeat_penalty: Some(1.1),
607 max_tokens: Some(4096),
608 uniform_cache: Some(true),
609 ..Default::default()
610 },
611 },
612 ]
613}
614
615#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
616#[serde(default)]
617pub struct DefaultParams {
618 #[serde(default)]
620 pub context_length: u32,
621 #[serde(default)]
622 pub threads: u32,
623 #[serde(default)]
624 pub threads_batch: u32,
625 #[serde(default)]
626 pub batch_size: u32,
627 #[serde(default)]
628 pub ubatch_size: u32,
629 #[serde(default = "default_cache_type_k")]
630 pub cache_type_k: Option<CacheTypeK>,
631 #[serde(default = "default_cache_type_v")]
632 pub cache_type_v: Option<CacheTypeV>,
633 #[serde(default)]
634 pub keep: i32,
635 #[serde(default)]
636 pub swa_full: bool,
637 #[serde(default)]
638 pub mlock: bool,
639 #[serde(default)]
640 pub mmap: bool,
641 #[serde(default)]
642 pub numa: NumMode,
643 #[serde(default)]
644 pub uniform_cache: bool,
645 #[serde(default)]
646 pub kv_cache_offload: bool,
647 #[serde(default)]
648 pub parallel: u32,
649 #[serde(default)]
650 pub max_concurrent_predictions: Option<u32>,
651 #[serde(default)]
652 pub system_prompt: String,
653 #[serde(default = "default_system_prompt_preset_name")]
654 pub system_prompt_preset_name: String,
655 #[serde(default)]
657 pub gpu_layers: i32,
658 #[serde(default = "default_gpu_layers_mode")]
659 pub gpu_layers_mode: crate::models::GpuLayersMode,
660 #[serde(default)]
661 pub split_mode: SplitMode,
662 #[serde(default)]
663 pub tensor_split: String,
664 #[serde(default)]
665 pub main_gpu: i32,
666 #[serde(default)]
667 pub fit: bool,
668 #[serde(default)]
669 pub lora: Option<PathBuf>,
670 #[serde(default)]
671 pub lora_scaled: Option<(PathBuf, f32)>,
672 #[serde(default)]
673 pub rpc: String,
674 #[serde(default)]
675 pub embedding: bool,
676 #[serde(default)]
677 pub flash_attn: bool,
678 #[serde(default)]
679 pub jinja: bool,
680 #[serde(default)]
681 pub chat_template: Option<String>,
682 #[serde(default)]
683 pub chat_template_kwargs: Option<String>,
684 #[serde(default)]
685 pub expert_count: i32,
686
687 #[serde(default)]
689 pub seed: i32,
690 #[serde(default)]
691 pub temperature: f32,
692 #[serde(default)]
693 pub top_k: i32,
694 #[serde(default)]
695 pub top_p: f32,
696 #[serde(default)]
697 pub min_p: f32,
698 #[serde(default)]
699 pub typical_p: f32,
700 #[serde(default)]
701 pub mirostat: Mirostat,
702 #[serde(default)]
703 pub mirostat_lr: f32,
704 #[serde(default)]
705 pub mirostat_ent: f32,
706 #[serde(default)]
707 pub ignore_eos: bool,
708 #[serde(default)]
709 pub samplers: Samplers,
710
711 #[serde(default)]
713 pub repeat_penalty: f32,
714 #[serde(default)]
715 pub repeat_last_n: i32,
716 #[serde(default = "default_presence_penalty")]
717 pub presence_penalty: Option<f32>,
718 #[serde(default = "default_frequency_penalty")]
719 pub frequency_penalty: Option<f32>,
720 #[serde(default)]
721 pub dry_multiplier: f32,
722 #[serde(default)]
723 pub dry_base: f32,
724 #[serde(default)]
725 pub dry_allowed_length: i32,
726 #[serde(default)]
727 pub dry_penalty_last_n: i32,
728
729 #[serde(default)]
731 pub rope_scaling: RopeScaling,
732 #[serde(default)]
733 pub rope_scale: f32,
734 #[serde(default)]
735 pub rope_freq_base: f32,
736 #[serde(default)]
737 pub rope_freq_scale: f32,
738 #[serde(default)]
739 pub rope_yarn_enabled: bool,
740
741 #[serde(default)]
743 pub host: String,
744 #[serde(default)]
745 pub port: u16,
746 #[serde(default)]
747 pub timeout: u32,
748 #[serde(default = "default_cache_prompt")]
749 pub cache_prompt: bool,
750 #[serde(default)]
751 pub cache_reuse: u32,
752 #[serde(default)]
753 pub webui: bool,
754 #[serde(default)]
755 pub ws_server_enabled: bool,
756 #[serde(default = "default_ws_server_port")]
757 pub ws_server_port: u16,
758 #[serde(default)]
759 pub ws_server_auth_key: Option<String>,
760 #[serde(default = "default_ws_server_tls_enabled")]
761 pub ws_server_tls_enabled: bool,
762 #[serde(default)]
763 pub ws_server_tls_cert: Option<String>,
764 #[serde(default)]
765 pub ws_server_tls_key: Option<String>,
766 #[serde(default)]
767 pub router_max_models: u32,
768 #[serde(default)]
769 pub server_mode: crate::models::ServerMode,
770
771 #[serde(default = "default_max_tokens")]
773 pub max_tokens: Option<u32>,
774 #[serde(default)]
775 pub cache_type: CacheType,
776 #[serde(default)]
777 pub backend: Backend,
778 #[serde(default)]
780 pub platform: Option<String>,
781 #[serde(default)]
782 pub llama_cpp_version_cpu: Option<String>,
783 #[serde(default)]
784 pub llama_cpp_version_vulkan: Option<String>,
785 #[serde(default)]
786 pub llama_cpp_version_rocm: Option<String>,
787 #[serde(default)]
788 pub llama_cpp_version_rocm_lemonade: Option<String>,
789 #[serde(default)]
790 pub llama_cpp_version_cuda: Option<String>,
791
792 #[serde(default)]
794 pub api_endpoint_enabled: bool,
795 #[serde(default = "default_api_endpoint_port")]
796 pub api_endpoint_port: u16,
797 #[serde(default)]
798 pub spec_type: String,
799 #[serde(default)]
800 pub draft_tokens: u32,
801 #[serde(default)]
802 pub tags: Vec<String>,
803}
804
805fn default_api_endpoint_port() -> u16 {
806 49222
807}
808
809fn default_system_prompt_preset_name() -> String {
810 "General".to_string()
811}
812
813fn default_cache_type_k() -> Option<CacheTypeK> {
814 None
815}
816fn default_cache_type_v() -> Option<CacheTypeV> {
817 None
818}
819fn default_presence_penalty() -> Option<f32> {
820 None
821}
822fn default_frequency_penalty() -> Option<f32> {
823 None
824}
825fn default_max_tokens() -> Option<u32> {
826 None
827}
828fn default_cache_prompt() -> bool {
829 true
830}
831fn default_ws_server_port() -> u16 {
832 49223
833}
834
835fn default_ws_server_tls_enabled() -> bool {
836 true
837}
838
839fn default_gpu_layers_mode() -> crate::models::GpuLayersMode {
840 crate::models::GpuLayersMode::Auto
841}
842
843impl Default for DefaultParams {
844 fn default() -> Self {
845 Self {
846 context_length: 131072,
848 threads: physical_cores(),
849 threads_batch: 8,
850 batch_size: 512,
851 ubatch_size: 512,
852 cache_type_k: None,
853 cache_type_v: None,
854 keep: 0,
855 swa_full: false,
856 mlock: false,
857 mmap: true,
858 numa: NumMode::None,
859 uniform_cache: true,
860 kv_cache_offload: true,
861 parallel: 1,
862 max_concurrent_predictions: None,
863 system_prompt: DEFAULT_SYSTEM_PROMPT.to_string(),
864 system_prompt_preset_name: "Coder".to_string(),
865
866 gpu_layers: -1,
868 gpu_layers_mode: crate::models::GpuLayersMode::Auto,
869 split_mode: SplitMode::Layer,
870 tensor_split: String::new(),
871 main_gpu: 0,
872 fit: true,
873 lora: None,
874 lora_scaled: None,
875 rpc: String::new(),
876 embedding: false,
877 flash_attn: true,
878 jinja: true,
879 chat_template: None,
880 chat_template_kwargs: None,
881 expert_count: -1,
882
883 seed: -1,
885 temperature: 0.8,
886 top_k: 40,
887 top_p: 0.95,
888 min_p: 0.0,
889 typical_p: 1.0,
890 mirostat: Mirostat::Off,
891 mirostat_lr: 0.1,
892 mirostat_ent: 5.0,
893 ignore_eos: false,
894 samplers: Samplers::default(),
895
896 repeat_penalty: 1.1,
898 repeat_last_n: 64,
899 presence_penalty: None,
900 frequency_penalty: None,
901 dry_multiplier: 0.0,
902 dry_base: 1.75,
903 dry_allowed_length: 2,
904 dry_penalty_last_n: -1,
905
906 rope_scaling: RopeScaling::None,
908 rope_scale: 1.0,
909 rope_freq_base: 0.0,
910 rope_freq_scale: 1.0,
911 rope_yarn_enabled: false,
912
913 host: "127.0.0.1".to_string(),
915 port: 8080,
916 timeout: 600,
917 cache_prompt: true,
918 cache_reuse: 0,
919 webui: false,
920 ws_server_enabled: false,
921 ws_server_port: 49223,
922 ws_server_auth_key: None,
923 ws_server_tls_enabled: true,
924 ws_server_tls_cert: None,
925 ws_server_tls_key: None,
926 router_max_models: 4,
927 server_mode: crate::models::ServerMode::Normal,
928
929 max_tokens: None,
931 cache_type: CacheType::F16,
932 backend: {
933 use crate::backend::hardware::{GpuVendor, detect_gpu_vendors};
934 let vendors = detect_gpu_vendors();
935 let mut result = Backend::Cpu;
936 for v in &vendors {
937 if matches!(v, GpuVendor::Nvidia) {
938 result = Backend::Cuda;
939 break;
940 }
941 if matches!(v, GpuVendor::Amd) {
942 result = Backend::Rocm;
943 break;
944 }
945 if matches!(v, GpuVendor::Intel) {
946 result = Backend::Vulkan;
947 break;
948 }
949 }
950 result
951 },
952 platform: None,
953 llama_cpp_version_cpu: None,
954 llama_cpp_version_vulkan: None,
955 llama_cpp_version_rocm: None,
956 llama_cpp_version_rocm_lemonade: None,
957 llama_cpp_version_cuda: None,
958 api_endpoint_enabled: false,
959 api_endpoint_port: 49222,
960 spec_type: String::new(),
961 draft_tokens: 0,
962 tags: Vec::new(),
963 }
964 }
965}
966
967impl Default for Config {
968 fn default() -> Self {
969 Self {
970 models_dirs: vec![
971 dirs::data_dir()
972 .unwrap_or_default()
973 .join("llm-manager")
974 .join("models"),
975 ],
976 llama_server: "llama-server".into(),
977 default: DefaultParams::default(),
978 model_overrides: ModelConfigStore::new(vec![]),
979 profiles: Default::default(),
980 system_prompt_presets: Default::default(),
981 rpc_workers: Vec::new(),
982 search_limit: default_search_limit(),
983 active_panel: ActivePanel::Models,
984 left_pct: 55,
985 language: default_language(),
986 }
987 }
988}
989
990impl Config {
991 pub fn config_path() -> PathBuf {
992 config_base_dir().join("llm-manager").join("config.yaml")
993 }
994
995 pub fn validate(&self) -> Vec<String> {
997 let mut warnings = Vec::new();
998 let default = &self.default;
999
1000 if default.context_length < 512 || default.context_length > 131072 {
1002 warnings.push(format!(
1003 "context_length {} is outside recommended range 512-131072",
1004 default.context_length
1005 ));
1006 }
1007 if default.temperature < 0.0 || default.temperature > 2.0 {
1008 warnings.push(format!(
1009 "temperature {} is outside recommended range 0.0-2.0",
1010 default.temperature
1011 ));
1012 }
1013 if (default.top_p < 0.0 || default.top_p > 1.0) && default.top_p != 0.0 {
1014 warnings.push(format!(
1015 "top_p {} is outside recommended range 0.0-1.0",
1016 default.top_p
1017 ));
1018 }
1019 if (default.repeat_penalty < 0.0 || default.repeat_penalty > 3.0)
1020 && default.repeat_penalty != 1.0
1021 {
1022 warnings.push(format!(
1023 "repeat_penalty {} is outside recommended range 0.0-3.0",
1024 default.repeat_penalty
1025 ));
1026 }
1027 if default.mirostat_lr < 0.0 || default.mirostat_lr > 1.0 {
1028 warnings.push(format!(
1029 "mirostat_lr {} is outside recommended range 0.0-1.0",
1030 default.mirostat_lr
1031 ));
1032 }
1033 if default.mirostat_ent < 0.0 || default.mirostat_ent > 10.0 {
1034 warnings.push(format!(
1035 "mirostat_ent {} is outside recommended range 0.0-10.0",
1036 default.mirostat_ent
1037 ));
1038 }
1039
1040 if default.timeout < 1 {
1041 warnings.push(format!(
1042 "timeout {} must be at least 1 second",
1043 default.timeout
1044 ));
1045 }
1046
1047 if let Some(lora) = &default.lora
1049 && !lora.exists()
1050 {
1051 warnings.push(format!("lora path {} does not exist", lora.display()));
1052 }
1053 if let Some((lora, _)) = &default.lora_scaled
1054 && !lora.exists()
1055 {
1056 warnings.push(format!("lora path {} does not exist", lora.display()));
1057 }
1058
1059 for model_name in self.model_overrides.keys() {
1061 if let Some(override_settings) = self.model_overrides.get(model_name.as_str()) {
1062 if let Some(lora) = &override_settings.lora
1063 && !lora.exists()
1064 {
1065 warnings.push(format!(
1066 "model '{}' lora path {} does not exist",
1067 model_name,
1068 lora.display()
1069 ));
1070 }
1071 if let Some((lora, _)) = &override_settings.lora_scaled
1072 && !lora.exists()
1073 {
1074 warnings.push(format!(
1075 "model '{}' lora path {} does not exist",
1076 model_name,
1077 lora.display()
1078 ));
1079 }
1080 }
1081 }
1082
1083 warnings
1084 }
1085
1086 pub fn resolve_settings(
1088 &self,
1089 model_name: Option<&str>,
1090 profile_name: Option<&str>,
1091 ) -> crate::models::ModelSettings {
1092 let mut settings = crate::models::ModelSettings::from_config(self);
1093
1094 if let Some(name) = model_name
1096 && let Some(override_settings) = self.model_overrides.get(name)
1097 {
1098 override_settings.apply(&mut settings);
1099 }
1100
1101 if let Some(p_name) = profile_name {
1103 if let Some(profile) = self.profiles.get(p_name) {
1104 profile.settings.apply(&mut settings);
1105 } else if let Some(profile) = builtin_profiles().iter().find(|p| p.name == p_name) {
1106 profile.settings.apply(&mut settings);
1107 }
1108 }
1109
1110 settings
1111 }
1112
1113 pub fn get_preset_content(&self, name: &str) -> Option<String> {
1115 self.system_prompt_presets
1116 .get(name)
1117 .map(|p| p.content.clone())
1118 }
1119
1120 fn normalize_config(mut config: Config) -> Config {
1121 for path in &mut config.models_dirs {
1123 let path_str = path.to_string_lossy();
1124 if let Some(stripped) = path_str.strip_prefix("~/") {
1125 let home = dirs::home_dir().unwrap_or_default();
1126 *path = home.join(stripped);
1127 } else if !path.is_absolute() {
1128 let home = dirs::home_dir().unwrap_or_default();
1129 *path = home.join(path_str.as_ref());
1130 }
1131 }
1132
1133 for p in builtin_profiles() {
1135 if config.profiles.get(&p.name).is_none() {
1136 config.profiles.insert_builtin(p);
1137 }
1138 }
1139
1140 for p in builtin_system_prompt_presets() {
1142 if config.system_prompt_presets.get(&p.name).is_none() {
1143 config.system_prompt_presets.insert_builtin(p);
1144 }
1145 }
1146 config
1147 }
1148
1149 fn load_impl(path: &PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1150 let content = std::fs::read_to_string(path)?;
1151 let config: Config = serde_yaml::from_str(&content)
1152 .map_err(|e| format!("Failed to parse config file {}: {}", path.display(), e))?;
1153 let config = Self::normalize_config(config);
1154 let config = config.auto_detect_platform();
1155 let warnings = config.validate();
1156 if !warnings.is_empty() {
1157 eprintln!("Config validation warnings:");
1158 for warning in &warnings {
1159 eprintln!(" - {}", warning);
1160 }
1161 }
1162 Ok(config)
1163 }
1164
1165 pub fn load() -> Result<Self, Box<dyn std::error::Error>> {
1166 let path = Self::config_path();
1167 if path.exists() {
1168 Self::load_impl(&path)
1169 } else {
1170 let mut config = Config::default();
1171 config.save()?;
1172 Ok(config)
1173 }
1174 }
1175
1176 pub fn load_from(path: PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1177 if path.exists() {
1178 Self::load_impl(&path)
1179 } else {
1180 Err(format!("Config file not found: {}", path.display()).into())
1181 }
1182 }
1183
1184 fn auto_detect_platform(mut self) -> Self {
1186 if self.default.platform.is_none() {
1187 self.default.platform =
1188 Some(
1189 crate::backend::hardware::platform_name(
1190 crate::backend::hardware::detect_platform(),
1191 )
1192 .to_string(),
1193 );
1194 }
1195 self
1196 }
1197
1198 pub fn save(&mut self) -> Result<(), Box<dyn std::error::Error>> {
1199 let path = Self::config_path();
1200 if let Some(parent) = path.parent() {
1201 std::fs::create_dir_all(parent)?;
1202 }
1203 let content = serde_yaml::to_string(self)?;
1204 std::fs::write(&path, content)?;
1205 let entries: Vec<(String, ModelOverride)> = self
1207 .model_overrides
1208 .keys()
1209 .iter()
1210 .filter_map(|k| self.model_overrides.get(k).map(|v| (k.clone(), v.clone())))
1211 .collect();
1212 for (name, cfg) in entries {
1213 self.model_overrides.save(&name, &cfg);
1214 }
1215 for profile in self.profiles.user_profiles() {
1217 self.profiles.save(&profile);
1218 }
1219 for preset in self.system_prompt_presets.user_presets() {
1221 self.system_prompt_presets.save(&preset);
1222 }
1223 Ok(())
1224 }
1225
1226 pub fn merged_profiles(&self) -> Vec<Profile> {
1227 self.profiles.all()
1228 }
1229
1230 pub fn merged_presets(&self) -> Vec<SystemPromptPreset> {
1231 self.system_prompt_presets.all()
1232 }
1233}
1234
1235#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1236pub enum LogLevel {
1237 Info,
1238 Warning,
1239 Error,
1240}
1241
1242impl LogLevel {
1243 pub fn label(&self) -> &'static str {
1244 match self {
1245 LogLevel::Info => "INFO",
1246 LogLevel::Warning => "WARNING",
1247 LogLevel::Error => "ERROR",
1248 }
1249 }
1250}
1251
1252#[derive(Debug, Clone)]
1253pub struct LogEntry {
1254 pub timestamp: String,
1255 pub level: LogLevel,
1256 pub message: String,
1257}
1258
1259impl LogEntry {
1260 pub fn new(message: impl Into<String>, level: LogLevel) -> Self {
1261 let timestamp = Local::now().format("%H:%M:%S").to_string();
1262 let message = sanitize_log(&message.into());
1263 Self {
1264 timestamp,
1265 level,
1266 message,
1267 }
1268 }
1269}
1270
1271fn sanitize_log(input: &str) -> String {
1274 let max_len = 2000;
1276 let chars: Vec<char> = input.chars().collect();
1277 let truncated = chars.len() > max_len;
1278 let chars = if truncated {
1279 chars[..max_len].to_vec()
1280 } else {
1281 chars
1282 };
1283
1284 let mut output = String::with_capacity(chars.len());
1285 for c in chars {
1286 if c.is_control() && c != '\n' && c != '\t' {
1289 continue;
1290 }
1291 output.push(c);
1292 }
1293
1294 let output = output.replace('\t', " ");
1296
1297 let mut result = output.trim_end().to_string();
1299 if truncated {
1300 result.push_str("... (truncated)");
1301 }
1302 result
1303}