1mod model_config;
2mod presets;
3mod profiles;
4mod store;
5
6use std::collections::HashSet;
7use std::path::PathBuf;
8
9use chrono::Local;
10use serde::{Deserialize, Serialize};
11
12pub use model_config::ModelConfigStore;
13
14pub use profiles::ProfileStore;
15
16use crate::models::{
17 Backend, CacheType, CacheTypeK, CacheTypeV, Mirostat, NumMode, RopeScaling, Samplers, SplitMode,
18};
19use crate::tui::app::ActivePanel;
20pub use presets::PresetStore;
21
22pub const DEFAULT_SYSTEM_PROMPT: &str =
24 "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.";
25
26pub fn config_base_dir() -> PathBuf {
31 if let Some(d) = dirs::config_dir() {
32 return d;
33 }
34 if let Some(home) = dirs::home_dir() {
35 return home.join(".config");
36 }
37 PathBuf::from(".").join(".llm-manager")
38}
39
40pub fn physical_cores() -> u32 {
43 let content = match std::fs::read_to_string("/proc/cpuinfo") {
44 Ok(c) => c,
45 Err(_) => {
46 return std::thread::available_parallelism()
47 .map(|p| p.get() as u32)
48 .unwrap_or(1);
49 }
50 };
51 let mut seen = HashSet::new();
52 let mut cur_phys: Option<&str> = None;
53 let mut cur_core: Option<&str> = None;
54 for line in content.lines() {
55 if let Some((key, val)) = line.split_once(':') {
56 let key = key.trim();
57 let val = val.trim();
58 match key {
59 "physical id" => cur_phys = Some(val),
60 "core id" => cur_core = Some(val),
61 _ => {}
62 }
63 if let (Some(phys), Some(core)) = (cur_phys, cur_core) {
64 seen.insert((phys, core));
65 }
66 }
67 }
68 seen.len() as u32
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct RpcWorker {
74 #[serde(default)]
75 pub selected: bool,
76 #[serde(default)]
77 pub name: String,
78 pub ip: String,
79 #[serde(default = "default_rpc_port")]
80 pub port: u16,
81}
82
83fn default_rpc_port() -> u16 {
84 50052
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct Config {
90 pub models_dirs: Vec<PathBuf>,
91 pub llama_server: PathBuf,
92 pub default: DefaultParams,
93 #[serde(default, skip)]
95 pub model_overrides: ModelConfigStore,
96 #[serde(default, skip)]
98 pub profiles: ProfileStore,
99 #[serde(default, skip)]
101 pub system_prompt_presets: PresetStore,
102 #[serde(default)]
104 pub rpc_workers: Vec<RpcWorker>,
105 #[serde(default = "default_search_limit")]
107 pub search_limit: u32,
108 #[serde(default)]
110 pub active_panel: crate::tui::app::ActivePanel,
111 #[serde(default = "default_left_pct")]
113 pub left_pct: u16,
114}
115
116fn default_left_pct() -> u16 {
117 55
118}
119
120fn default_search_limit() -> u32 {
121 50
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
126pub struct Profile {
127 pub name: String,
128 pub description: String,
130 #[serde(default)]
132 pub settings: ModelOverride,
133}
134
135impl Profile {
136 pub fn apply(&self, mut base: crate::models::ModelSettings) -> crate::models::ModelSettings {
138 self.settings.apply(&mut base);
139 base
140 }
141}
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct SystemPromptPreset {
146 pub name: String,
147 pub description: String,
148 pub content: String,
149}
150
151pub fn builtin_system_prompt_presets() -> Vec<SystemPromptPreset> {
153 vec![
154 SystemPromptPreset {
155 name: "General".into(),
156 description: "General-purpose assistant".into(),
157 content: "You are a helpful assistant.".into(),
158 },
159 SystemPromptPreset {
160 name: "Coder".into(),
161 description: "Expert software developer".into(),
162 content: "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.".into(),
163 },
164 SystemPromptPreset {
165 name: "Thinker".into(),
166 description: "Analytical and thoughtful".into(),
167 content: "You are a thoughtful and analytical AI assistant. Think carefully before answering. Provide well-reasoned responses with clear explanations.".into(),
168 },
169 SystemPromptPreset {
170 name: "Mathematician".into(),
171 description: "Expert in mathematics".into(),
172 content: "You are an expert in mathematics. Provide clear, step-by-step solutions to mathematical problems. Show your reasoning and explain key concepts.".into(),
173 },
174 ]
175}
176
177#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
178pub struct ModelOverride {
179 pub context_length: Option<u32>,
181 pub batch_size: Option<u32>,
182 pub ubatch_size: Option<u32>,
183 pub cache_type_k: Option<CacheTypeK>,
184 pub cache_type_v: Option<CacheTypeV>,
185 pub keep: Option<i32>,
186 pub swa_full: Option<bool>,
187 pub mlock: Option<bool>,
188 pub mmap: Option<bool>,
189 pub numa: Option<NumMode>,
190 pub uniform_cache: Option<bool>,
191 pub system_prompt: Option<String>,
192 pub system_prompt_preset_name: Option<String>,
193 pub max_concurrent_predictions: Option<u32>,
194 pub threads: Option<u32>,
195 pub threads_batch: Option<u32>,
196 pub parallel: Option<u32>,
197
198 pub gpu_layers: Option<i32>,
200 pub split_mode: Option<SplitMode>,
201 pub tensor_split: Option<String>,
202 pub main_gpu: Option<i32>,
203 pub fit: Option<bool>,
204 pub lora: Option<PathBuf>,
205 pub lora_scaled: Option<(PathBuf, f32)>,
206 pub rpc: Option<String>,
207 pub embedding: Option<bool>,
208 pub kv_cache_offload: Option<bool>,
209 pub flash_attn: Option<bool>,
210 pub jinja: Option<bool>,
211 pub chat_template: Option<String>,
212 pub chat_template_kwargs: Option<String>,
213 pub expert_count: Option<i32>,
214 pub gpu_layers_mode: Option<crate::models::GpuLayersMode>,
215
216 pub seed: Option<i32>,
218 pub temperature: Option<f32>,
219 pub top_k: Option<i32>,
220 pub top_p: Option<f32>,
221 pub min_p: Option<f32>,
222 pub typical_p: Option<f32>,
223 pub mirostat: Option<Mirostat>,
224 pub mirostat_lr: Option<f32>,
225 pub mirostat_ent: Option<f32>,
226 pub ignore_eos: Option<bool>,
227 pub samplers: Option<Samplers>,
228
229 pub repeat_penalty: Option<f32>,
231 pub repeat_last_n: Option<i32>,
232 pub presence_penalty: Option<f32>,
233 pub frequency_penalty: Option<f32>,
234 pub dry_multiplier: Option<f32>,
235 pub dry_base: Option<f32>,
236 pub dry_allowed_length: Option<i32>,
237 pub dry_penalty_last_n: Option<i32>,
238
239 pub rope_scaling: Option<RopeScaling>,
241 pub rope_scale: Option<f32>,
242 pub rope_freq_base: Option<f32>,
243 pub rope_freq_scale: Option<f32>,
244 pub rope_yarn_enabled: Option<bool>,
245
246 pub cache_prompt: Option<bool>,
248 pub cache_reuse: Option<u32>,
249 pub webui: Option<bool>,
250
251 pub max_tokens: Option<u32>,
253 pub cache_type: Option<CacheType>,
254 pub llama_cpp_version_cpu: Option<String>,
255 pub llama_cpp_version_vulkan: Option<String>,
256 pub llama_cpp_version_rocm: Option<String>,
257 pub llama_cpp_version_rocm_lemonade: Option<String>,
258 pub llama_cpp_version_cuda: Option<String>,
259 pub spec_type: Option<String>,
260 pub draft_tokens: Option<u32>,
261 pub tags: Option<Vec<String>>,
262}
263
264macro_rules! apply_scalar {
266 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
267 $(
268 $base.$field = $self.$field.unwrap_or($base.$field);
269 )+
270 };
271}
272
273macro_rules! apply_clone {
275 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
276 $(
277 if let Some(v) = &$self.$field {
278 $base.$field = v.clone();
279 }
280 )+
281 };
282}
283
284macro_rules! apply_option {
286 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
287 $(
288 if let Some(v) = &$self.$field {
289 $base.$field = Some(v.clone());
290 }
291 )+
292 };
293}
294
295impl ModelOverride {
296 pub fn from_settings(s: &crate::models::ModelSettings) -> Self {
297 Self {
298 context_length: Some(s.context_length),
299 batch_size: Some(s.batch_size),
300 ubatch_size: Some(s.ubatch_size),
301 cache_type_k: s.cache_type_k,
302 cache_type_v: s.cache_type_v,
303 keep: Some(s.keep),
304 swa_full: Some(s.swa_full),
305 mlock: Some(s.mlock),
306 mmap: Some(s.mmap),
307 numa: Some(s.numa),
308 uniform_cache: Some(s.uniform_cache),
309 system_prompt: Some(s.system_prompt.clone()),
310 system_prompt_preset_name: Some(s.system_prompt_preset_name.clone()),
311 max_concurrent_predictions: s.max_concurrent_predictions,
312 threads: Some(s.threads),
313 threads_batch: Some(s.threads_batch),
314 parallel: Some(s.parallel),
315 gpu_layers: Some(match s.gpu_layers_mode {
316 crate::models::GpuLayersMode::Auto => 0,
317 crate::models::GpuLayersMode::Specific(n) => n as i32,
318 crate::models::GpuLayersMode::All => -1,
319 }),
320 gpu_layers_mode: Some(s.gpu_layers_mode),
321 split_mode: Some(s.split_mode),
322 tensor_split: Some(s.tensor_split.clone()),
323 main_gpu: Some(s.main_gpu),
324 fit: Some(s.fit),
325 lora: s.lora.clone(),
326 lora_scaled: s.lora_scaled.clone(),
327 rpc: Some(s.rpc.clone()),
328 embedding: Some(s.embedding),
329 kv_cache_offload: Some(s.kv_cache_offload),
330 flash_attn: Some(s.flash_attn),
331 jinja: Some(s.jinja),
332 chat_template: s.chat_template.clone(),
333 chat_template_kwargs: s.chat_template_kwargs.clone(),
334 expert_count: Some(s.expert_count),
335 seed: Some(s.seed),
336 temperature: Some(s.temperature),
337 top_k: Some(s.top_k),
338 top_p: Some(s.top_p),
339 min_p: Some(s.min_p),
340 typical_p: Some(s.typical_p),
341 mirostat: Some(s.mirostat),
342 mirostat_lr: Some(s.mirostat_lr),
343 mirostat_ent: Some(s.mirostat_ent),
344 ignore_eos: Some(s.ignore_eos),
345 samplers: Some(s.samplers.clone()),
346 repeat_penalty: Some(s.repeat_penalty),
347 repeat_last_n: Some(s.repeat_last_n),
348 presence_penalty: s.presence_penalty,
349 frequency_penalty: s.frequency_penalty,
350 dry_multiplier: Some(s.dry_multiplier),
351 dry_base: Some(s.dry_base),
352 dry_allowed_length: Some(s.dry_allowed_length),
353 dry_penalty_last_n: Some(s.dry_penalty_last_n),
354 rope_scaling: Some(s.rope_scaling),
355 rope_scale: Some(s.rope_scale),
356 rope_freq_base: Some(s.rope_freq_base),
357 rope_freq_scale: Some(s.rope_freq_scale),
358 rope_yarn_enabled: Some(s.rope_yarn_enabled),
359 cache_prompt: Some(s.cache_prompt),
360 cache_reuse: Some(s.cache_reuse),
361 webui: Some(s.webui),
362 max_tokens: s.max_tokens,
363 cache_type: Some(s.cache_type),
364 llama_cpp_version_cpu: s.llama_cpp_version_cpu.clone(),
365 llama_cpp_version_vulkan: s.llama_cpp_version_vulkan.clone(),
366 llama_cpp_version_rocm: s.llama_cpp_version_rocm.clone(),
367 llama_cpp_version_rocm_lemonade: s.llama_cpp_version_rocm_lemonade.clone(),
368 llama_cpp_version_cuda: s.llama_cpp_version_cuda.clone(),
369 spec_type: Some(s.spec_type.clone()),
370 draft_tokens: Some(s.draft_tokens),
371 tags: Some(s.tags.clone()),
372 }
373 }
374
375 pub fn apply(&self, base: &mut crate::models::ModelSettings) {
377 apply_scalar!(self, base,
382 context_length, batch_size, ubatch_size, keep, swa_full, mlock, mmap,
383 numa, uniform_cache, kv_cache_offload, threads, threads_batch, parallel,
384 split_mode, main_gpu, fit, embedding, flash_attn, jinja, expert_count,
385 seed, temperature, top_k, top_p, min_p, typical_p,
386 mirostat, mirostat_lr, mirostat_ent, ignore_eos,
387 repeat_penalty, repeat_last_n,
388 dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
389 rope_scaling, rope_scale, rope_freq_base, rope_freq_scale, rope_yarn_enabled,
390 cache_prompt, cache_reuse, webui, cache_type,
391 draft_tokens, gpu_layers_mode,
392 );
393
394 apply_clone!(self, base,
396 system_prompt, system_prompt_preset_name, tensor_split, rpc,
397 samplers, spec_type, tags,
398 );
399
400 apply_option!(self, base,
402 lora, lora_scaled, chat_template, chat_template_kwargs,
403 llama_cpp_version_cpu, llama_cpp_version_vulkan,
404 llama_cpp_version_rocm, llama_cpp_version_rocm_lemonade,
405 llama_cpp_version_cuda,
406 );
407
408 base.cache_type_k = self.cache_type_k;
410 base.cache_type_v = self.cache_type_v;
411 base.presence_penalty = self.presence_penalty;
412 base.frequency_penalty = self.frequency_penalty;
413 base.max_tokens = self.max_tokens;
414
415 base.max_concurrent_predictions = self
417 .max_concurrent_predictions
418 .or(base.max_concurrent_predictions);
419
420 if let Some(n) = self.gpu_layers {
423 base.gpu_layers_mode = match n {
424 n if n < 0 => crate::models::GpuLayersMode::All,
425 n => crate::models::GpuLayersMode::Specific(n as u32),
426 };
427 }
428
429 }
445}
446
447pub fn builtin_profiles() -> Vec<Profile> {
449 vec![
450 Profile {
451 name: "Qwen".into(),
452 description: "Optimized for Qwen models (dense)".into(),
453 settings: ModelOverride {
454 context_length: Some(131072),
455 temperature: Some(0.7),
456 top_k: Some(20),
457 top_p: Some(0.95),
458 max_tokens: Some(4096),
459 presence_penalty: Some(0.0),
460 uniform_cache: Some(true),
461 jinja: Some(true),
462 ..Default::default()
463 },
464 },
465 Profile {
466 name: "Qwen-MoE".into(),
467 description: "Optimized for Qwen MoE models (35B-A3B)".into(),
468 settings: ModelOverride {
469 context_length: Some(131072),
470 temperature: Some(0.8),
471 top_k: Some(20),
472 top_p: Some(0.95),
473 max_tokens: Some(4096),
474 presence_penalty: Some(1.5),
475 uniform_cache: Some(true),
476 jinja: Some(true),
477 ..Default::default()
478 },
479 },
480 Profile {
481 name: "Qwen-Coding".into(),
482 description: "Optimized for Qwen models in coding mode".into(),
483 settings: ModelOverride {
484 context_length: Some(131072),
485 temperature: Some(0.6),
486 top_k: Some(20),
487 top_p: Some(0.95),
488 max_tokens: Some(4096),
489 presence_penalty: Some(0.0),
490 uniform_cache: Some(true),
491 jinja: Some(true),
492 ..Default::default()
493 },
494 },
495 Profile {
496 name: "Gemma".into(),
497 description: "Optimized for Gemma 2/4 models".into(),
498 settings: ModelOverride {
499 context_length: Some(131072),
500 min_p: Some(0.1),
501 temperature: Some(1.0),
502 top_k: Some(65),
503 top_p: Some(0.95),
504 max_tokens: Some(4096),
505 uniform_cache: Some(true),
506 jinja: Some(true),
507 ..Default::default()
508 },
509 },
510 Profile {
511 name: "Llama".into(),
512 description: "Optimized for Llama 3.1/3.3 models".into(),
513 settings: ModelOverride {
514 context_length: Some(131072),
515 temperature: Some(0.7),
516 top_p: Some(0.9),
517 repeat_penalty: Some(1.1),
518 max_tokens: Some(4096),
519 uniform_cache: Some(true),
520 jinja: Some(true),
521 ..Default::default()
522 },
523 },
524 Profile {
525 name: "Mistral".into(),
526 description: "Optimized for Mistral 7B/NeMo models".into(),
527 settings: ModelOverride {
528 context_length: Some(131072),
529 temperature: Some(0.7),
530 top_k: Some(50),
531 top_p: Some(0.9),
532 max_tokens: Some(4096),
533 uniform_cache: Some(true),
534 jinja: Some(true),
535 ..Default::default()
536 },
537 },
538 Profile {
539 name: "Phi".into(),
540 description: "Optimized for Phi 3.5 Mini models".into(),
541 settings: ModelOverride {
542 context_length: Some(131072),
543 temperature: Some(0.7),
544 top_k: Some(50),
545 top_p: Some(0.9),
546 repeat_penalty: Some(1.1),
547 max_tokens: Some(4096),
548 uniform_cache: Some(true),
549 ..Default::default()
550 },
551 },
552 ]
553}
554
555#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
556#[serde(default)]
557pub struct DefaultParams {
558 #[serde(default)]
560 pub context_length: u32,
561 #[serde(default)]
562 pub threads: u32,
563 #[serde(default)]
564 pub threads_batch: u32,
565 #[serde(default)]
566 pub batch_size: u32,
567 #[serde(default)]
568 pub ubatch_size: u32,
569 #[serde(default = "default_cache_type_k")]
570 pub cache_type_k: Option<CacheTypeK>,
571 #[serde(default = "default_cache_type_v")]
572 pub cache_type_v: Option<CacheTypeV>,
573 #[serde(default)]
574 pub keep: i32,
575 #[serde(default)]
576 pub swa_full: bool,
577 #[serde(default)]
578 pub mlock: bool,
579 #[serde(default)]
580 pub mmap: bool,
581 #[serde(default)]
582 pub numa: NumMode,
583 #[serde(default)]
584 pub uniform_cache: bool,
585 #[serde(default)]
586 pub kv_cache_offload: bool,
587 #[serde(default)]
588 pub parallel: u32,
589 #[serde(default)]
590 pub max_concurrent_predictions: Option<u32>,
591 #[serde(default)]
592 pub system_prompt: String,
593 #[serde(default = "default_system_prompt_preset_name")]
594 pub system_prompt_preset_name: String,
595 #[serde(default)]
597 pub gpu_layers: i32,
598 #[serde(default = "default_gpu_layers_mode")]
599 pub gpu_layers_mode: crate::models::GpuLayersMode,
600 #[serde(default)]
601 pub split_mode: SplitMode,
602 #[serde(default)]
603 pub tensor_split: String,
604 #[serde(default)]
605 pub main_gpu: i32,
606 #[serde(default)]
607 pub fit: bool,
608 #[serde(default)]
609 pub lora: Option<PathBuf>,
610 #[serde(default)]
611 pub lora_scaled: Option<(PathBuf, f32)>,
612 #[serde(default)]
613 pub rpc: String,
614 #[serde(default)]
615 pub embedding: bool,
616 #[serde(default)]
617 pub flash_attn: bool,
618 #[serde(default)]
619 pub jinja: bool,
620 #[serde(default)]
621 pub chat_template: Option<String>,
622 #[serde(default)]
623 pub chat_template_kwargs: Option<String>,
624 #[serde(default)]
625 pub expert_count: i32,
626
627 #[serde(default)]
629 pub seed: i32,
630 #[serde(default)]
631 pub temperature: f32,
632 #[serde(default)]
633 pub top_k: i32,
634 #[serde(default)]
635 pub top_p: f32,
636 #[serde(default)]
637 pub min_p: f32,
638 #[serde(default)]
639 pub typical_p: f32,
640 #[serde(default)]
641 pub mirostat: Mirostat,
642 #[serde(default)]
643 pub mirostat_lr: f32,
644 #[serde(default)]
645 pub mirostat_ent: f32,
646 #[serde(default)]
647 pub ignore_eos: bool,
648 #[serde(default)]
649 pub samplers: Samplers,
650
651 #[serde(default)]
653 pub repeat_penalty: f32,
654 #[serde(default)]
655 pub repeat_last_n: i32,
656 #[serde(default = "default_presence_penalty")]
657 pub presence_penalty: Option<f32>,
658 #[serde(default = "default_frequency_penalty")]
659 pub frequency_penalty: Option<f32>,
660 #[serde(default)]
661 pub dry_multiplier: f32,
662 #[serde(default)]
663 pub dry_base: f32,
664 #[serde(default)]
665 pub dry_allowed_length: i32,
666 #[serde(default)]
667 pub dry_penalty_last_n: i32,
668
669 #[serde(default)]
671 pub rope_scaling: RopeScaling,
672 #[serde(default)]
673 pub rope_scale: f32,
674 #[serde(default)]
675 pub rope_freq_base: f32,
676 #[serde(default)]
677 pub rope_freq_scale: f32,
678 #[serde(default)]
679 pub rope_yarn_enabled: bool,
680
681 #[serde(default)]
683 pub host: String,
684 #[serde(default)]
685 pub port: u16,
686 #[serde(default)]
687 pub timeout: u32,
688 #[serde(default = "default_cache_prompt")]
689 pub cache_prompt: bool,
690 #[serde(default)]
691 pub cache_reuse: u32,
692 #[serde(default)]
693 pub webui: bool,
694 #[serde(default)]
695 pub ws_server_enabled: bool,
696 #[serde(default = "default_ws_server_port")]
697 pub ws_server_port: u16,
698 #[serde(default)]
699 pub ws_server_auth_key: Option<String>,
700 #[serde(default = "default_ws_server_tls_enabled")]
701 pub ws_server_tls_enabled: bool,
702 #[serde(default)]
703 pub ws_server_tls_cert: Option<String>,
704 #[serde(default)]
705 pub ws_server_tls_key: Option<String>,
706 #[serde(default)]
707 pub router_max_models: u32,
708 #[serde(default)]
709 pub server_mode: crate::models::ServerMode,
710
711 #[serde(default = "default_max_tokens")]
713 pub max_tokens: Option<u32>,
714 #[serde(default)]
715 pub cache_type: CacheType,
716 #[serde(default)]
717 pub backend: Backend,
718 #[serde(default)]
720 pub platform: Option<String>,
721 #[serde(default)]
722 pub llama_cpp_version_cpu: Option<String>,
723 #[serde(default)]
724 pub llama_cpp_version_vulkan: Option<String>,
725 #[serde(default)]
726 pub llama_cpp_version_rocm: Option<String>,
727 #[serde(default)]
728 pub llama_cpp_version_rocm_lemonade: Option<String>,
729 #[serde(default)]
730 pub llama_cpp_version_cuda: Option<String>,
731
732 #[serde(default)]
734 pub api_endpoint_enabled: bool,
735 #[serde(default = "default_api_endpoint_port")]
736 pub api_endpoint_port: u16,
737 #[serde(default)]
738 pub spec_type: String,
739 #[serde(default)]
740 pub draft_tokens: u32,
741 #[serde(default)]
742 pub tags: Vec<String>,
743}
744
745fn default_api_endpoint_port() -> u16 {
746 49222
747}
748
749fn default_system_prompt_preset_name() -> String {
750 "General".to_string()
751}
752
753fn default_cache_type_k() -> Option<CacheTypeK> {
754 None
755}
756fn default_cache_type_v() -> Option<CacheTypeV> {
757 None
758}
759fn default_presence_penalty() -> Option<f32> {
760 None
761}
762fn default_frequency_penalty() -> Option<f32> {
763 None
764}
765fn default_max_tokens() -> Option<u32> {
766 None
767}
768fn default_cache_prompt() -> bool {
769 true
770}
771fn default_ws_server_port() -> u16 {
772 49223
773}
774
775fn default_ws_server_tls_enabled() -> bool {
776 true
777}
778
779fn default_gpu_layers_mode() -> crate::models::GpuLayersMode {
780 crate::models::GpuLayersMode::Auto
781}
782
783impl Default for DefaultParams {
784 fn default() -> Self {
785 Self {
786 context_length: 131072,
788 threads: physical_cores(),
789 threads_batch: 8,
790 batch_size: 512,
791 ubatch_size: 512,
792 cache_type_k: None,
793 cache_type_v: None,
794 keep: 0,
795 swa_full: false,
796 mlock: false,
797 mmap: true,
798 numa: NumMode::None,
799 uniform_cache: true,
800 kv_cache_offload: true,
801 parallel: 1,
802 max_concurrent_predictions: None,
803 system_prompt: DEFAULT_SYSTEM_PROMPT.to_string(),
804 system_prompt_preset_name: "Coder".to_string(),
805
806 gpu_layers: -1,
808 gpu_layers_mode: crate::models::GpuLayersMode::Auto,
809 split_mode: SplitMode::Layer,
810 tensor_split: String::new(),
811 main_gpu: 0,
812 fit: true,
813 lora: None,
814 lora_scaled: None,
815 rpc: String::new(),
816 embedding: false,
817 flash_attn: true,
818 jinja: true,
819 chat_template: None,
820 chat_template_kwargs: None,
821 expert_count: -1,
822
823 seed: -1,
825 temperature: 0.8,
826 top_k: 40,
827 top_p: 0.95,
828 min_p: 0.0,
829 typical_p: 1.0,
830 mirostat: Mirostat::Off,
831 mirostat_lr: 0.1,
832 mirostat_ent: 5.0,
833 ignore_eos: false,
834 samplers: Samplers::default(),
835
836 repeat_penalty: 1.1,
838 repeat_last_n: 64,
839 presence_penalty: None,
840 frequency_penalty: None,
841 dry_multiplier: 0.0,
842 dry_base: 1.75,
843 dry_allowed_length: 2,
844 dry_penalty_last_n: -1,
845
846 rope_scaling: RopeScaling::None,
848 rope_scale: 1.0,
849 rope_freq_base: 0.0,
850 rope_freq_scale: 1.0,
851 rope_yarn_enabled: false,
852
853 host: "127.0.0.1".to_string(),
855 port: 8080,
856 timeout: 600,
857 cache_prompt: true,
858 cache_reuse: 0,
859 webui: false,
860 ws_server_enabled: false,
861 ws_server_port: 49223,
862 ws_server_auth_key: None,
863 ws_server_tls_enabled: true,
864 ws_server_tls_cert: None,
865 ws_server_tls_key: None,
866 router_max_models: 4,
867 server_mode: crate::models::ServerMode::Normal,
868
869 max_tokens: None,
871 cache_type: CacheType::F16,
872 backend: {
873 use crate::backend::hardware::{GpuVendor, detect_gpu_vendors};
874 let vendors = detect_gpu_vendors();
875 let mut result = Backend::Cpu;
876 for v in &vendors {
877 if matches!(v, GpuVendor::Nvidia) {
878 result = Backend::Cuda;
879 break;
880 }
881 if matches!(v, GpuVendor::Amd) {
882 result = Backend::Rocm;
883 break;
884 }
885 if matches!(v, GpuVendor::Intel) {
886 result = Backend::Vulkan;
887 break;
888 }
889 }
890 result
891 },
892 platform: None,
893 llama_cpp_version_cpu: None,
894 llama_cpp_version_vulkan: None,
895 llama_cpp_version_rocm: None,
896 llama_cpp_version_rocm_lemonade: None,
897 llama_cpp_version_cuda: None,
898 api_endpoint_enabled: false,
899 api_endpoint_port: 49222,
900 spec_type: String::new(),
901 draft_tokens: 0,
902 tags: Vec::new(),
903 }
904 }
905}
906
907impl Default for Config {
908 fn default() -> Self {
909 Self {
910 models_dirs: vec![
911 dirs::data_dir()
912 .unwrap_or_default()
913 .join("llm-manager")
914 .join("models"),
915 ],
916 llama_server: "llama-server".into(),
917 default: DefaultParams::default(),
918 model_overrides: Default::default(),
919 profiles: Default::default(),
920 system_prompt_presets: Default::default(),
921 rpc_workers: Vec::new(),
922 search_limit: default_search_limit(),
923 active_panel: ActivePanel::Models,
924 left_pct: 55,
925 }
926 }
927}
928
929impl Config {
930 pub fn config_path() -> PathBuf {
931 config_base_dir()
932 .join("llm-manager")
933 .join("config.yaml")
934 }
935
936 pub fn validate(&self) -> Vec<String> {
938 let mut warnings = Vec::new();
939 let default = &self.default;
940
941 if default.context_length < 512 || default.context_length > 131072 {
943 warnings.push(format!(
944 "context_length {} is outside recommended range 512-131072",
945 default.context_length
946 ));
947 }
948 if default.temperature < 0.0 || default.temperature > 2.0 {
949 warnings.push(format!(
950 "temperature {} is outside recommended range 0.0-2.0",
951 default.temperature
952 ));
953 }
954 if (default.top_p < 0.0 || default.top_p > 1.0) && default.top_p != 0.0 {
955 warnings.push(format!(
956 "top_p {} is outside recommended range 0.0-1.0",
957 default.top_p
958 ));
959 }
960 if (default.repeat_penalty < 0.0 || default.repeat_penalty > 3.0)
961 && default.repeat_penalty != 1.0
962 {
963 warnings.push(format!(
964 "repeat_penalty {} is outside recommended range 0.0-3.0",
965 default.repeat_penalty
966 ));
967 }
968 if default.mirostat_lr < 0.0 || default.mirostat_lr > 1.0 {
969 warnings.push(format!(
970 "mirostat_lr {} is outside recommended range 0.0-1.0",
971 default.mirostat_lr
972 ));
973 }
974 if default.mirostat_ent < 0.0 || default.mirostat_ent > 10.0 {
975 warnings.push(format!(
976 "mirostat_ent {} is outside recommended range 0.0-10.0",
977 default.mirostat_ent
978 ));
979 }
980
981 if default.timeout < 1 {
982 warnings.push(format!(
983 "timeout {} must be at least 1 second",
984 default.timeout
985 ));
986 }
987
988 if let Some(lora) = &default.lora
990 && !lora.exists() {
991 warnings.push(format!("lora path {} does not exist", lora.display()));
992 }
993 if let Some((lora, _)) = &default.lora_scaled
994 && !lora.exists() {
995 warnings.push(format!("lora path {} does not exist", lora.display()));
996 }
997
998 for model_name in self.model_overrides.keys() {
1000 if let Some(override_settings) = self.model_overrides.get(model_name.as_str()) {
1001 if let Some(lora) = &override_settings.lora
1002 && !lora.exists() {
1003 warnings.push(format!(
1004 "model '{}' lora path {} does not exist",
1005 model_name,
1006 lora.display()
1007 ));
1008 }
1009 if let Some((lora, _)) = &override_settings.lora_scaled
1010 && !lora.exists() {
1011 warnings.push(format!(
1012 "model '{}' lora path {} does not exist",
1013 model_name,
1014 lora.display()
1015 ));
1016 }
1017 }
1018 }
1019
1020 warnings
1021 }
1022
1023 pub fn resolve_settings(
1025 &self,
1026 model_name: Option<&str>,
1027 profile_name: Option<&str>,
1028 ) -> crate::models::ModelSettings {
1029 let mut settings = crate::models::ModelSettings::from_config(self);
1030
1031 if let Some(name) = model_name
1033 && let Some(override_settings) = self.model_overrides.get(name)
1034 {
1035 override_settings.apply(&mut settings);
1036 }
1037
1038 if let Some(p_name) = profile_name {
1040 if let Some(profile) = self.profiles.get(p_name) {
1041 profile.settings.apply(&mut settings);
1042 } else if let Some(profile) = builtin_profiles().iter().find(|p| p.name == p_name) {
1043 profile.settings.apply(&mut settings);
1044 }
1045 }
1046
1047 settings
1048 }
1049
1050 pub fn get_preset_content(&self, name: &str) -> Option<String> {
1052 self.system_prompt_presets
1053 .get(name)
1054 .map(|p| p.content.clone())
1055 }
1056
1057 fn normalize_config(mut config: Config) -> Config {
1058 for path in &mut config.models_dirs {
1060 let path_str = path.to_string_lossy();
1061 if let Some(stripped) = path_str.strip_prefix("~/") {
1062 let home = dirs::home_dir().unwrap_or_default();
1063 *path = home.join(stripped);
1064 } else if !path.is_absolute() {
1065 let home = dirs::home_dir().unwrap_or_default();
1066 *path = home.join(path_str.as_ref());
1067 }
1068 }
1069
1070 for p in builtin_profiles() {
1072 if config.profiles.get(&p.name).is_none() {
1073 config.profiles.insert_builtin(p);
1074 }
1075 }
1076
1077 for p in builtin_system_prompt_presets() {
1079 if config.system_prompt_presets.get(&p.name).is_none() {
1080 config.system_prompt_presets.insert_builtin(p);
1081 }
1082 }
1083 config
1084 }
1085
1086 fn load_impl(path: &PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1087 let content = std::fs::read_to_string(path)?;
1088 let config: Config = serde_yaml::from_str(&content)
1089 .map_err(|e| format!("Failed to parse config file {}: {}", path.display(), e))?;
1090 let config = Self::normalize_config(config);
1091 let config = config.auto_detect_platform();
1092 let warnings = config.validate();
1093 if !warnings.is_empty() {
1094 eprintln!("Config validation warnings:");
1095 for warning in &warnings {
1096 eprintln!(" - {}", warning);
1097 }
1098 }
1099 Ok(config)
1100 }
1101
1102 pub fn load() -> Result<Self, Box<dyn std::error::Error>> {
1103 let path = Self::config_path();
1104 if path.exists() {
1105 Self::load_impl(&path)
1106 } else {
1107 let mut config = Config::default();
1108 config.save()?;
1109 Ok(config)
1110 }
1111 }
1112
1113 pub fn load_from(path: PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1114 if path.exists() {
1115 Self::load_impl(&path)
1116 } else {
1117 Err(format!("Config file not found: {}", path.display()).into())
1118 }
1119 }
1120
1121 fn auto_detect_platform(mut self) -> Self {
1123 if self.default.platform.is_none() {
1124 self.default.platform =
1125 Some(
1126 crate::backend::hardware::platform_name(
1127 crate::backend::hardware::detect_platform(),
1128 )
1129 .to_string(),
1130 );
1131 }
1132 self
1133 }
1134
1135 pub fn save(&mut self) -> Result<(), Box<dyn std::error::Error>> {
1136 let path = Self::config_path();
1137 if let Some(parent) = path.parent() {
1138 std::fs::create_dir_all(parent)?;
1139 }
1140 let content = serde_yaml::to_string(self)?;
1141 std::fs::write(&path, content)?;
1142 let entries: Vec<(String, ModelOverride)> = self
1144 .model_overrides
1145 .keys()
1146 .iter()
1147 .filter_map(|k| self.model_overrides.get(k).map(|v| (k.clone(), v.clone())))
1148 .collect();
1149 for (name, cfg) in entries {
1150 self.model_overrides.save(&name, &cfg);
1151 }
1152 for profile in self.profiles.user_profiles() {
1154 self.profiles.save(&profile);
1155 }
1156 for preset in self.system_prompt_presets.user_presets() {
1158 self.system_prompt_presets.save(&preset);
1159 }
1160 Ok(())
1161 }
1162
1163 pub fn merged_profiles(&self) -> Vec<Profile> {
1164 self.profiles.all()
1165 }
1166
1167 pub fn merged_presets(&self) -> Vec<SystemPromptPreset> {
1168 self.system_prompt_presets.all()
1169 }
1170}
1171
1172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1173pub enum LogLevel {
1174 Info,
1175 Warning,
1176 Error,
1177}
1178
1179impl LogLevel {
1180 pub fn label(&self) -> &'static str {
1181 match self {
1182 LogLevel::Info => "INFO",
1183 LogLevel::Warning => "WARNING",
1184 LogLevel::Error => "ERROR",
1185 }
1186 }
1187}
1188
1189#[derive(Debug, Clone)]
1190pub struct LogEntry {
1191 pub timestamp: String,
1192 pub level: LogLevel,
1193 pub message: String,
1194}
1195
1196impl LogEntry {
1197 pub fn new(message: impl Into<String>, level: LogLevel) -> Self {
1198 let timestamp = Local::now().format("%H:%M:%S").to_string();
1199 let message = sanitize_log(&message.into());
1200 Self {
1201 timestamp,
1202 level,
1203 message,
1204 }
1205 }
1206}
1207
1208fn sanitize_log(input: &str) -> String {
1211 let max_len = 2000;
1213 let chars: Vec<char> = input.chars().collect();
1214 let truncated = chars.len() > max_len;
1215 let chars = if truncated {
1216 chars[..max_len].to_vec()
1217 } else {
1218 chars
1219 };
1220
1221 let mut output = String::with_capacity(chars.len());
1222 for c in chars {
1223 if c.is_control() && c != '\n' && c != '\t' {
1226 continue;
1227 }
1228 output.push(c);
1229 }
1230
1231 let output = output.replace('\t', " ");
1233
1234 let mut result = output.trim_end().to_string();
1236 if truncated {
1237 result.push_str("... (truncated)");
1238 }
1239 result
1240}