1mod model_config;
2mod presets;
3mod profiles;
4mod store;
5
6use std::collections::HashSet;
7use std::path::PathBuf;
8
9use chrono::Local;
10use serde::{Deserialize, Serialize};
11
12pub use model_config::ModelConfigStore;
13
14pub use profiles::ProfileStore;
15
16use crate::models::{
17 Backend, CacheType, CacheTypeK, CacheTypeV, Mirostat, NumMode, RopeScaling, Samplers, SplitMode,
18};
19pub use presets::PresetStore;
20
21pub fn config_base_dir() -> PathBuf {
26 if let Some(d) = dirs::config_dir() {
27 return d;
28 }
29 if let Some(home) = dirs::home_dir() {
30 return home.join(".config");
31 }
32 PathBuf::from(".").join(".llm-manager")
33}
34
35pub fn physical_cores() -> u32 {
38 let content = match std::fs::read_to_string("/proc/cpuinfo") {
39 Ok(c) => c,
40 Err(_) => {
41 return std::thread::available_parallelism()
42 .map(|p| p.get() as u32)
43 .unwrap_or(1);
44 }
45 };
46 let mut seen = HashSet::new();
47 let mut cur_phys: Option<&str> = None;
48 let mut cur_core: Option<&str> = None;
49 for line in content.lines() {
50 if let Some((key, val)) = line.split_once(':') {
51 let key = key.trim();
52 let val = val.trim();
53 match key {
54 "physical id" => cur_phys = Some(val),
55 "core id" => cur_core = Some(val),
56 _ => {}
57 }
58 if let (Some(phys), Some(core)) = (cur_phys, cur_core) {
59 seen.insert((phys, core));
60 }
61 }
62 }
63 seen.len() as u32
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct RpcWorker {
69 #[serde(default)]
70 pub selected: bool,
71 #[serde(default)]
72 pub name: String,
73 pub ip: String,
74 #[serde(default = "default_rpc_port")]
75 pub port: u16,
76}
77
78fn default_rpc_port() -> u16 {
79 50052
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct Config {
85 pub models_dirs: Vec<PathBuf>,
86 pub llama_server: PathBuf,
87 pub default: DefaultParams,
88 #[serde(default, skip)]
90 pub model_overrides: ModelConfigStore,
91 #[serde(default, skip)]
93 pub profiles: ProfileStore,
94 #[serde(default, skip)]
96 pub system_prompt_presets: PresetStore,
97 #[serde(default)]
99 pub rpc_workers: Vec<RpcWorker>,
100 #[serde(default = "default_search_limit")]
102 pub search_limit: u32,
103}
104
105fn default_search_limit() -> u32 {
106 50
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
111pub struct Profile {
112 pub name: String,
113 pub description: String,
115 #[serde(default)]
117 pub settings: ModelOverride,
118}
119
120impl Profile {
121 pub fn apply(&self, mut base: crate::models::ModelSettings) -> crate::models::ModelSettings {
123 self.settings.apply(&mut base);
124 base
125 }
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct SystemPromptPreset {
131 pub name: String,
132 pub description: String,
133 pub content: String,
134}
135
136pub fn builtin_system_prompt_presets() -> Vec<SystemPromptPreset> {
138 vec![
139 SystemPromptPreset {
140 name: "General".into(),
141 description: "General-purpose assistant".into(),
142 content: "You are a helpful assistant.".into(),
143 },
144 SystemPromptPreset {
145 name: "Coder".into(),
146 description: "Expert software developer".into(),
147 content: "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.".into(),
148 },
149 SystemPromptPreset {
150 name: "Thinker".into(),
151 description: "Analytical and thoughtful".into(),
152 content: "You are a thoughtful and analytical AI assistant. Think carefully before answering. Provide well-reasoned responses with clear explanations.".into(),
153 },
154 SystemPromptPreset {
155 name: "Mathematician".into(),
156 description: "Expert in mathematics".into(),
157 content: "You are an expert in mathematics. Provide clear, step-by-step solutions to mathematical problems. Show your reasoning and explain key concepts.".into(),
158 },
159 ]
160}
161
162#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
163pub struct ModelOverride {
164 pub context_length: Option<u32>,
166 pub batch_size: Option<u32>,
167 pub ubatch_size: Option<u32>,
168 pub cache_type_k: Option<CacheTypeK>,
169 pub cache_type_v: Option<CacheTypeV>,
170 pub keep: Option<i32>,
171 pub swa_full: Option<bool>,
172 pub mlock: Option<bool>,
173 pub mmap: Option<bool>,
174 pub numa: Option<NumMode>,
175 pub uniform_cache: Option<bool>,
176 pub system_prompt: Option<String>,
177 pub system_prompt_preset_name: Option<String>,
178 pub max_concurrent_predictions: Option<u32>,
179 pub threads: Option<u32>,
180 pub threads_batch: Option<u32>,
181 pub parallel: Option<u32>,
182
183 pub gpu_layers: Option<i32>,
185 pub split_mode: Option<SplitMode>,
186 pub tensor_split: Option<String>,
187 pub main_gpu: Option<i32>,
188 pub fit: Option<bool>,
189 pub lora: Option<PathBuf>,
190 pub lora_scaled: Option<(PathBuf, f32)>,
191 pub rpc: Option<String>,
192 pub embedding: Option<bool>,
193 pub kv_cache_offload: Option<bool>,
194 pub flash_attn: Option<bool>,
195 pub jinja: Option<bool>,
196 pub chat_template: Option<String>,
197 pub chat_template_kwargs: Option<String>,
198 pub expert_count: Option<i32>,
199 pub gpu_layers_mode: Option<crate::models::GpuLayersMode>,
200
201 pub seed: Option<i32>,
203 pub temperature: Option<f32>,
204 pub top_k: Option<i32>,
205 pub top_p: Option<f32>,
206 pub min_p: Option<f32>,
207 pub typical_p: Option<f32>,
208 pub mirostat: Option<Mirostat>,
209 pub mirostat_lr: Option<f32>,
210 pub mirostat_ent: Option<f32>,
211 pub ignore_eos: Option<bool>,
212 pub samplers: Option<Samplers>,
213
214 pub repeat_penalty: Option<f32>,
216 pub repeat_last_n: Option<i32>,
217 pub presence_penalty: Option<f32>,
218 pub frequency_penalty: Option<f32>,
219 pub dry_multiplier: Option<f32>,
220 pub dry_base: Option<f32>,
221 pub dry_allowed_length: Option<i32>,
222 pub dry_penalty_last_n: Option<i32>,
223
224 pub rope_scaling: Option<RopeScaling>,
226 pub rope_scale: Option<f32>,
227 pub rope_freq_base: Option<f32>,
228 pub rope_freq_scale: Option<f32>,
229 pub rope_yarn_enabled: Option<bool>,
230
231 pub cache_prompt: Option<bool>,
233 pub cache_reuse: Option<u32>,
234 pub webui: Option<bool>,
235
236 pub max_tokens: Option<u32>,
238 pub cache_type: Option<CacheType>,
239 pub llama_cpp_version_cpu: Option<String>,
240 pub llama_cpp_version_vulkan: Option<String>,
241 pub llama_cpp_version_rocm: Option<String>,
242 pub llama_cpp_version_rocm_lemonade: Option<String>,
243 pub llama_cpp_version_cuda: Option<String>,
244 pub spec_type: Option<String>,
245 pub draft_tokens: Option<u32>,
246 pub tags: Option<Vec<String>>,
247}
248
249macro_rules! apply_scalar {
251 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
252 $(
253 $base.$field = $self.$field.unwrap_or($base.$field);
254 )+
255 };
256}
257
258macro_rules! apply_clone {
260 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
261 $(
262 if let Some(v) = &$self.$field {
263 $base.$field = v.clone();
264 }
265 )+
266 };
267}
268
269macro_rules! apply_option {
271 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
272 $(
273 if let Some(v) = &$self.$field {
274 $base.$field = Some(v.clone());
275 }
276 )+
277 };
278}
279
280impl ModelOverride {
281 pub fn from_settings(s: &crate::models::ModelSettings) -> Self {
282 Self {
283 context_length: Some(s.context_length),
284 batch_size: Some(s.batch_size),
285 ubatch_size: Some(s.ubatch_size),
286 cache_type_k: s.cache_type_k,
287 cache_type_v: s.cache_type_v,
288 keep: Some(s.keep),
289 swa_full: Some(s.swa_full),
290 mlock: Some(s.mlock),
291 mmap: Some(s.mmap),
292 numa: Some(s.numa),
293 uniform_cache: Some(s.uniform_cache),
294 system_prompt: Some(s.system_prompt.clone()),
295 system_prompt_preset_name: Some(s.system_prompt_preset_name.clone()),
296 max_concurrent_predictions: s.max_concurrent_predictions,
297 threads: Some(s.threads),
298 threads_batch: Some(s.threads_batch),
299 parallel: Some(s.parallel),
300 gpu_layers: Some(match s.gpu_layers_mode {
301 crate::models::GpuLayersMode::Auto => 0,
302 crate::models::GpuLayersMode::Specific(n) => n as i32,
303 crate::models::GpuLayersMode::All => -1,
304 }),
305 gpu_layers_mode: Some(s.gpu_layers_mode),
306 split_mode: Some(s.split_mode),
307 tensor_split: Some(s.tensor_split.clone()),
308 main_gpu: Some(s.main_gpu),
309 fit: Some(s.fit),
310 lora: s.lora.clone(),
311 lora_scaled: s.lora_scaled.clone(),
312 rpc: Some(s.rpc.clone()),
313 embedding: Some(s.embedding),
314 kv_cache_offload: Some(s.kv_cache_offload),
315 flash_attn: Some(s.flash_attn),
316 jinja: Some(s.jinja),
317 chat_template: s.chat_template.clone(),
318 chat_template_kwargs: s.chat_template_kwargs.clone(),
319 expert_count: Some(s.expert_count),
320 seed: Some(s.seed),
321 temperature: Some(s.temperature),
322 top_k: Some(s.top_k),
323 top_p: Some(s.top_p),
324 min_p: Some(s.min_p),
325 typical_p: Some(s.typical_p),
326 mirostat: Some(s.mirostat),
327 mirostat_lr: Some(s.mirostat_lr),
328 mirostat_ent: Some(s.mirostat_ent),
329 ignore_eos: Some(s.ignore_eos),
330 samplers: Some(s.samplers.clone()),
331 repeat_penalty: Some(s.repeat_penalty),
332 repeat_last_n: Some(s.repeat_last_n),
333 presence_penalty: s.presence_penalty,
334 frequency_penalty: s.frequency_penalty,
335 dry_multiplier: Some(s.dry_multiplier),
336 dry_base: Some(s.dry_base),
337 dry_allowed_length: Some(s.dry_allowed_length),
338 dry_penalty_last_n: Some(s.dry_penalty_last_n),
339 rope_scaling: Some(s.rope_scaling),
340 rope_scale: Some(s.rope_scale),
341 rope_freq_base: Some(s.rope_freq_base),
342 rope_freq_scale: Some(s.rope_freq_scale),
343 rope_yarn_enabled: Some(s.rope_yarn_enabled),
344 cache_prompt: Some(s.cache_prompt),
345 cache_reuse: Some(s.cache_reuse),
346 webui: Some(s.webui),
347 max_tokens: s.max_tokens,
348 cache_type: Some(s.cache_type),
349 llama_cpp_version_cpu: s.llama_cpp_version_cpu.clone(),
350 llama_cpp_version_vulkan: s.llama_cpp_version_vulkan.clone(),
351 llama_cpp_version_rocm: s.llama_cpp_version_rocm.clone(),
352 llama_cpp_version_rocm_lemonade: s.llama_cpp_version_rocm_lemonade.clone(),
353 llama_cpp_version_cuda: s.llama_cpp_version_cuda.clone(),
354 spec_type: Some(s.spec_type.clone()),
355 draft_tokens: Some(s.draft_tokens),
356 tags: Some(s.tags.clone()),
357 }
358 }
359
360 pub fn apply(&self, base: &mut crate::models::ModelSettings) {
362 apply_scalar!(self, base,
367 context_length, batch_size, ubatch_size, keep, swa_full, mlock, mmap,
368 numa, uniform_cache, kv_cache_offload, threads, threads_batch, parallel,
369 split_mode, main_gpu, fit, embedding, flash_attn, jinja, expert_count,
370 seed, temperature, top_k, top_p, min_p, typical_p,
371 mirostat, mirostat_lr, mirostat_ent, ignore_eos,
372 repeat_penalty, repeat_last_n,
373 dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
374 rope_scaling, rope_scale, rope_freq_base, rope_freq_scale, rope_yarn_enabled,
375 cache_prompt, cache_reuse, webui, cache_type,
376 draft_tokens, gpu_layers_mode,
377 );
378
379 apply_clone!(self, base,
381 system_prompt, system_prompt_preset_name, tensor_split, rpc,
382 samplers, spec_type, tags,
383 );
384
385 apply_option!(self, base,
387 lora, lora_scaled, chat_template, chat_template_kwargs,
388 llama_cpp_version_cpu, llama_cpp_version_vulkan,
389 llama_cpp_version_rocm, llama_cpp_version_rocm_lemonade,
390 llama_cpp_version_cuda,
391 );
392
393 base.cache_type_k = self.cache_type_k;
395 base.cache_type_v = self.cache_type_v;
396 base.presence_penalty = self.presence_penalty;
397 base.frequency_penalty = self.frequency_penalty;
398 base.max_tokens = self.max_tokens;
399
400 base.max_concurrent_predictions = self
402 .max_concurrent_predictions
403 .or(base.max_concurrent_predictions);
404
405 if let Some(n) = self.gpu_layers {
408 base.gpu_layers_mode = match n {
409 n if n < 0 => crate::models::GpuLayersMode::All,
410 n => crate::models::GpuLayersMode::Specific(n as u32),
411 };
412 }
413
414 }
430}
431
432pub fn builtin_profiles() -> Vec<Profile> {
434 vec![
435 Profile {
436 name: "Qwen".into(),
437 description: "Optimized for Qwen models (dense)".into(),
438 settings: ModelOverride {
439 context_length: Some(131072),
440 temperature: Some(0.7),
441 top_k: Some(20),
442 top_p: Some(0.95),
443 max_tokens: Some(4096),
444 presence_penalty: Some(0.0),
445 uniform_cache: Some(true),
446 jinja: Some(true),
447 ..Default::default()
448 },
449 },
450 Profile {
451 name: "Qwen-MoE".into(),
452 description: "Optimized for Qwen MoE models (35B-A3B)".into(),
453 settings: ModelOverride {
454 context_length: Some(131072),
455 temperature: Some(0.8),
456 top_k: Some(20),
457 top_p: Some(0.95),
458 max_tokens: Some(4096),
459 presence_penalty: Some(1.5),
460 uniform_cache: Some(true),
461 jinja: Some(true),
462 ..Default::default()
463 },
464 },
465 Profile {
466 name: "Qwen-Coding".into(),
467 description: "Optimized for Qwen models in coding mode".into(),
468 settings: ModelOverride {
469 context_length: Some(131072),
470 temperature: Some(0.6),
471 top_k: Some(20),
472 top_p: Some(0.95),
473 max_tokens: Some(4096),
474 presence_penalty: Some(0.0),
475 uniform_cache: Some(true),
476 jinja: Some(true),
477 ..Default::default()
478 },
479 },
480 Profile {
481 name: "Gemma".into(),
482 description: "Optimized for Gemma 2/4 models".into(),
483 settings: ModelOverride {
484 context_length: Some(131072),
485 min_p: Some(0.1),
486 temperature: Some(1.0),
487 top_k: Some(65),
488 top_p: Some(0.95),
489 max_tokens: Some(4096),
490 uniform_cache: Some(true),
491 jinja: Some(true),
492 ..Default::default()
493 },
494 },
495 Profile {
496 name: "Llama".into(),
497 description: "Optimized for Llama 3.1/3.3 models".into(),
498 settings: ModelOverride {
499 context_length: Some(131072),
500 temperature: Some(0.7),
501 top_p: Some(0.9),
502 repeat_penalty: Some(1.1),
503 max_tokens: Some(4096),
504 uniform_cache: Some(true),
505 jinja: Some(true),
506 ..Default::default()
507 },
508 },
509 Profile {
510 name: "Mistral".into(),
511 description: "Optimized for Mistral 7B/NeMo models".into(),
512 settings: ModelOverride {
513 context_length: Some(131072),
514 temperature: Some(0.7),
515 top_k: Some(50),
516 top_p: Some(0.9),
517 max_tokens: Some(4096),
518 uniform_cache: Some(true),
519 jinja: Some(true),
520 ..Default::default()
521 },
522 },
523 Profile {
524 name: "Phi".into(),
525 description: "Optimized for Phi 3.5 Mini models".into(),
526 settings: ModelOverride {
527 context_length: Some(131072),
528 temperature: Some(0.7),
529 top_k: Some(50),
530 top_p: Some(0.9),
531 repeat_penalty: Some(1.1),
532 max_tokens: Some(4096),
533 uniform_cache: Some(true),
534 ..Default::default()
535 },
536 },
537 ]
538}
539
540#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
541#[serde(default)]
542pub struct DefaultParams {
543 #[serde(default)]
545 pub context_length: u32,
546 #[serde(default)]
547 pub threads: u32,
548 #[serde(default)]
549 pub threads_batch: u32,
550 #[serde(default)]
551 pub batch_size: u32,
552 #[serde(default)]
553 pub ubatch_size: u32,
554 #[serde(default = "default_cache_type_k")]
555 pub cache_type_k: Option<CacheTypeK>,
556 #[serde(default = "default_cache_type_v")]
557 pub cache_type_v: Option<CacheTypeV>,
558 #[serde(default)]
559 pub keep: i32,
560 #[serde(default)]
561 pub swa_full: bool,
562 #[serde(default)]
563 pub mlock: bool,
564 #[serde(default)]
565 pub mmap: bool,
566 #[serde(default)]
567 pub numa: NumMode,
568 #[serde(default)]
569 pub uniform_cache: bool,
570 #[serde(default)]
571 pub kv_cache_offload: bool,
572 #[serde(default)]
573 pub parallel: u32,
574 #[serde(default)]
575 pub max_concurrent_predictions: Option<u32>,
576 #[serde(default)]
577 pub system_prompt: String,
578 #[serde(default = "default_system_prompt_preset_name")]
579 pub system_prompt_preset_name: String,
580 #[serde(default)]
582 pub gpu_layers: i32,
583 #[serde(default = "default_gpu_layers_mode")]
584 pub gpu_layers_mode: crate::models::GpuLayersMode,
585 #[serde(default)]
586 pub split_mode: SplitMode,
587 #[serde(default)]
588 pub tensor_split: String,
589 #[serde(default)]
590 pub main_gpu: i32,
591 #[serde(default)]
592 pub fit: bool,
593 #[serde(default)]
594 pub lora: Option<PathBuf>,
595 #[serde(default)]
596 pub lora_scaled: Option<(PathBuf, f32)>,
597 #[serde(default)]
598 pub rpc: String,
599 #[serde(default)]
600 pub embedding: bool,
601 #[serde(default)]
602 pub flash_attn: bool,
603 #[serde(default)]
604 pub jinja: bool,
605 #[serde(default)]
606 pub chat_template: Option<String>,
607 #[serde(default)]
608 pub chat_template_kwargs: Option<String>,
609 #[serde(default)]
610 pub expert_count: i32,
611
612 #[serde(default)]
614 pub seed: i32,
615 #[serde(default)]
616 pub temperature: f32,
617 #[serde(default)]
618 pub top_k: i32,
619 #[serde(default)]
620 pub top_p: f32,
621 #[serde(default)]
622 pub min_p: f32,
623 #[serde(default)]
624 pub typical_p: f32,
625 #[serde(default)]
626 pub mirostat: Mirostat,
627 #[serde(default)]
628 pub mirostat_lr: f32,
629 #[serde(default)]
630 pub mirostat_ent: f32,
631 #[serde(default)]
632 pub ignore_eos: bool,
633 #[serde(default)]
634 pub samplers: Samplers,
635
636 #[serde(default)]
638 pub repeat_penalty: f32,
639 #[serde(default)]
640 pub repeat_last_n: i32,
641 #[serde(default = "default_presence_penalty")]
642 pub presence_penalty: Option<f32>,
643 #[serde(default = "default_frequency_penalty")]
644 pub frequency_penalty: Option<f32>,
645 #[serde(default)]
646 pub dry_multiplier: f32,
647 #[serde(default)]
648 pub dry_base: f32,
649 #[serde(default)]
650 pub dry_allowed_length: i32,
651 #[serde(default)]
652 pub dry_penalty_last_n: i32,
653
654 #[serde(default)]
656 pub rope_scaling: RopeScaling,
657 #[serde(default)]
658 pub rope_scale: f32,
659 #[serde(default)]
660 pub rope_freq_base: f32,
661 #[serde(default)]
662 pub rope_freq_scale: f32,
663 #[serde(default)]
664 pub rope_yarn_enabled: bool,
665
666 #[serde(default)]
668 pub host: String,
669 #[serde(default)]
670 pub port: u16,
671 #[serde(default)]
672 pub timeout: u32,
673 #[serde(default = "default_cache_prompt")]
674 pub cache_prompt: bool,
675 #[serde(default)]
676 pub cache_reuse: u32,
677 #[serde(default)]
678 pub webui: bool,
679 #[serde(default)]
680 pub ws_server_enabled: bool,
681 #[serde(default = "default_ws_server_port")]
682 pub ws_server_port: u16,
683 #[serde(default)]
684 pub ws_server_auth_key: Option<String>,
685 #[serde(default = "default_ws_server_tls_enabled")]
686 pub ws_server_tls_enabled: bool,
687 #[serde(default)]
688 pub ws_server_tls_cert: Option<String>,
689 #[serde(default)]
690 pub ws_server_tls_key: Option<String>,
691 #[serde(default)]
692 pub router_max_models: u32,
693 #[serde(default)]
694 pub server_mode: crate::models::ServerMode,
695
696 #[serde(default = "default_max_tokens")]
698 pub max_tokens: Option<u32>,
699 #[serde(default)]
700 pub cache_type: CacheType,
701 #[serde(default)]
702 pub backend: Backend,
703 #[serde(default)]
705 pub platform: Option<String>,
706 #[serde(default)]
707 pub llama_cpp_version_cpu: Option<String>,
708 #[serde(default)]
709 pub llama_cpp_version_vulkan: Option<String>,
710 #[serde(default)]
711 pub llama_cpp_version_rocm: Option<String>,
712 #[serde(default)]
713 pub llama_cpp_version_rocm_lemonade: Option<String>,
714 #[serde(default)]
715 pub llama_cpp_version_cuda: Option<String>,
716
717 #[serde(default)]
719 pub api_endpoint_enabled: bool,
720 #[serde(default = "default_api_endpoint_port")]
721 pub api_endpoint_port: u16,
722 #[serde(default)]
723 pub spec_type: String,
724 #[serde(default)]
725 pub draft_tokens: u32,
726 #[serde(default)]
727 pub tags: Vec<String>,
728}
729
730fn default_api_endpoint_port() -> u16 {
731 49222
732}
733
734fn default_system_prompt_preset_name() -> String {
735 "General".to_string()
736}
737
738fn default_cache_type_k() -> Option<CacheTypeK> {
739 None
740}
741fn default_cache_type_v() -> Option<CacheTypeV> {
742 None
743}
744fn default_presence_penalty() -> Option<f32> {
745 None
746}
747fn default_frequency_penalty() -> Option<f32> {
748 None
749}
750fn default_max_tokens() -> Option<u32> {
751 None
752}
753fn default_cache_prompt() -> bool {
754 true
755}
756fn default_ws_server_port() -> u16 {
757 49223
758 }
759 fn default_ws_server_tls_enabled() -> bool {
760 true
761 }
762 fn default_gpu_layers_mode() -> crate::models::GpuLayersMode {
763 crate::models::GpuLayersMode::Auto
764}
765
766impl Default for DefaultParams {
767 fn default() -> Self {
768 Self {
769 context_length: 131072,
771 threads: physical_cores(),
772 threads_batch: 8,
773 batch_size: 512,
774 ubatch_size: 512,
775 cache_type_k: None,
776 cache_type_v: None,
777 keep: 0,
778 swa_full: false,
779 mlock: false,
780 mmap: true,
781 numa: NumMode::None,
782 uniform_cache: true,
783 kv_cache_offload: true,
784 parallel: 1,
785 max_concurrent_predictions: None,
786 system_prompt: "You are a helpful assistant.".to_string(),
787 system_prompt_preset_name: "General".to_string(),
788
789 gpu_layers: -1,
791 gpu_layers_mode: crate::models::GpuLayersMode::Auto,
792 split_mode: SplitMode::Layer,
793 tensor_split: String::new(),
794 main_gpu: 0,
795 fit: true,
796 lora: None,
797 lora_scaled: None,
798 rpc: String::new(),
799 embedding: false,
800 flash_attn: true,
801 jinja: true,
802 chat_template: None,
803 chat_template_kwargs: None,
804 expert_count: -1,
805
806 seed: -1,
808 temperature: 0.8,
809 top_k: 40,
810 top_p: 0.95,
811 min_p: 0.0,
812 typical_p: 1.0,
813 mirostat: Mirostat::Off,
814 mirostat_lr: 0.1,
815 mirostat_ent: 5.0,
816 ignore_eos: false,
817 samplers: Samplers::default(),
818
819 repeat_penalty: 1.1,
821 repeat_last_n: 64,
822 presence_penalty: None,
823 frequency_penalty: None,
824 dry_multiplier: 0.0,
825 dry_base: 1.75,
826 dry_allowed_length: 2,
827 dry_penalty_last_n: -1,
828
829 rope_scaling: RopeScaling::None,
831 rope_scale: 1.0,
832 rope_freq_base: 0.0,
833 rope_freq_scale: 1.0,
834 rope_yarn_enabled: false,
835
836 host: "127.0.0.1".to_string(),
838 port: 8080,
839 timeout: 600,
840 cache_prompt: true,
841 cache_reuse: 0,
842 webui: false,
843 ws_server_enabled: false,
844 ws_server_port: 49223,
845 ws_server_auth_key: None,
846 ws_server_tls_enabled: true,
847 ws_server_tls_cert: None,
848 ws_server_tls_key: None,
849 router_max_models: 4,
850 server_mode: crate::models::ServerMode::Normal,
851
852 max_tokens: None,
854 cache_type: CacheType::F16,
855 backend: {
856 use crate::backend::hardware::{GpuVendor, detect_gpu_vendors};
857 let vendors = detect_gpu_vendors();
858 let mut result = Backend::Cpu;
859 for v in &vendors {
860 if matches!(v, GpuVendor::Nvidia) {
861 result = Backend::Cuda;
862 break;
863 }
864 if matches!(v, GpuVendor::Amd) {
865 result = Backend::Rocm;
866 break;
867 }
868 if matches!(v, GpuVendor::Intel) {
869 result = Backend::Vulkan;
870 break;
871 }
872 }
873 result
874 },
875 platform: None,
876 llama_cpp_version_cpu: None,
877 llama_cpp_version_vulkan: None,
878 llama_cpp_version_rocm: None,
879 llama_cpp_version_rocm_lemonade: None,
880 llama_cpp_version_cuda: None,
881 api_endpoint_enabled: false,
882 api_endpoint_port: 49222,
883 spec_type: String::new(),
884 draft_tokens: 0,
885 tags: Vec::new(),
886 }
887 }
888}
889
890impl Default for Config {
891 fn default() -> Self {
892 Self {
893 models_dirs: vec![
894 dirs::data_dir()
895 .unwrap_or_default()
896 .join("llm-manager")
897 .join("models"),
898 ],
899 llama_server: "llama-server".into(),
900 default: DefaultParams::default(),
901 model_overrides: Default::default(),
902 profiles: Default::default(),
903 system_prompt_presets: Default::default(),
904 rpc_workers: Vec::new(),
905 search_limit: default_search_limit(),
906 }
907 }
908}
909
910impl Config {
911 pub fn config_path() -> PathBuf {
912 config_base_dir()
913 .join("llm-manager")
914 .join("config.yaml")
915 }
916
917 pub fn validate(&self) -> Vec<String> {
919 let mut warnings = Vec::new();
920 let default = &self.default;
921
922 if default.context_length < 512 || default.context_length > 131072 {
924 warnings.push(format!(
925 "context_length {} is outside recommended range 512-131072",
926 default.context_length
927 ));
928 }
929 if default.temperature < 0.0 || default.temperature > 2.0 {
930 warnings.push(format!(
931 "temperature {} is outside recommended range 0.0-2.0",
932 default.temperature
933 ));
934 }
935 if (default.top_p < 0.0 || default.top_p > 1.0) && default.top_p != 0.0 {
936 warnings.push(format!(
937 "top_p {} is outside recommended range 0.0-1.0",
938 default.top_p
939 ));
940 }
941 if (default.repeat_penalty < 0.0 || default.repeat_penalty > 3.0)
942 && default.repeat_penalty != 1.0
943 {
944 warnings.push(format!(
945 "repeat_penalty {} is outside recommended range 0.0-3.0",
946 default.repeat_penalty
947 ));
948 }
949 if default.mirostat_lr < 0.0 || default.mirostat_lr > 1.0 {
950 warnings.push(format!(
951 "mirostat_lr {} is outside recommended range 0.0-1.0",
952 default.mirostat_lr
953 ));
954 }
955 if default.mirostat_ent < 0.0 || default.mirostat_ent > 10.0 {
956 warnings.push(format!(
957 "mirostat_ent {} is outside recommended range 0.0-10.0",
958 default.mirostat_ent
959 ));
960 }
961
962 if default.timeout < 1 {
963 warnings.push(format!(
964 "timeout {} must be at least 1 second",
965 default.timeout
966 ));
967 }
968
969 if let Some(lora) = &default.lora
971 && !lora.exists() {
972 warnings.push(format!("lora path {} does not exist", lora.display()));
973 }
974 if let Some((lora, _)) = &default.lora_scaled
975 && !lora.exists() {
976 warnings.push(format!("lora path {} does not exist", lora.display()));
977 }
978
979 for model_name in self.model_overrides.keys() {
981 if let Some(override_settings) = self.model_overrides.get(model_name.as_str()) {
982 if let Some(lora) = &override_settings.lora
983 && !lora.exists() {
984 warnings.push(format!(
985 "model '{}' lora path {} does not exist",
986 model_name,
987 lora.display()
988 ));
989 }
990 if let Some((lora, _)) = &override_settings.lora_scaled
991 && !lora.exists() {
992 warnings.push(format!(
993 "model '{}' lora path {} does not exist",
994 model_name,
995 lora.display()
996 ));
997 }
998 }
999 }
1000
1001 warnings
1002 }
1003
1004 pub fn resolve_settings(
1006 &self,
1007 model_name: Option<&str>,
1008 profile_name: Option<&str>,
1009 ) -> crate::models::ModelSettings {
1010 let mut settings = crate::models::ModelSettings::from_config(self);
1011
1012 if let Some(name) = model_name
1014 && let Some(override_settings) = self.model_overrides.get(name)
1015 {
1016 override_settings.apply(&mut settings);
1017 }
1018
1019 if let Some(p_name) = profile_name {
1021 if let Some(profile) = self.profiles.get(p_name) {
1022 profile.settings.apply(&mut settings);
1023 } else if let Some(profile) = builtin_profiles().iter().find(|p| p.name == p_name) {
1024 profile.settings.apply(&mut settings);
1025 }
1026 }
1027
1028 settings
1029 }
1030
1031 pub fn get_preset_content(&self, name: &str) -> Option<String> {
1033 self.system_prompt_presets
1034 .get(name)
1035 .map(|p| p.content.clone())
1036 }
1037
1038 fn normalize_config(mut config: Config) -> Config {
1039 for path in &mut config.models_dirs {
1041 let path_str = path.to_string_lossy();
1042 if let Some(stripped) = path_str.strip_prefix("~/") {
1043 let home = dirs::home_dir().unwrap_or_default();
1044 *path = home.join(stripped);
1045 } else if !path.is_absolute() {
1046 let home = dirs::home_dir().unwrap_or_default();
1047 *path = home.join(path_str.as_ref());
1048 }
1049 }
1050
1051 for p in builtin_profiles() {
1053 if config.profiles.get(&p.name).is_none() {
1054 config.profiles.insert_builtin(p);
1055 }
1056 }
1057
1058 for p in builtin_system_prompt_presets() {
1060 if config.system_prompt_presets.get(&p.name).is_none() {
1061 config.system_prompt_presets.insert_builtin(p);
1062 }
1063 }
1064 config
1065 }
1066
1067 fn load_impl(path: &PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1068 let content = std::fs::read_to_string(path)?;
1069 let config: Config = serde_yaml::from_str(&content)
1070 .map_err(|e| format!("Failed to parse config file {}: {}", path.display(), e))?;
1071 let config = Self::normalize_config(config);
1072 let config = config.auto_detect_platform();
1073 let warnings = config.validate();
1074 if !warnings.is_empty() {
1075 eprintln!("Config validation warnings:");
1076 for warning in &warnings {
1077 eprintln!(" - {}", warning);
1078 }
1079 }
1080 Ok(config)
1081 }
1082
1083 pub fn load() -> Result<Self, Box<dyn std::error::Error>> {
1084 let path = Self::config_path();
1085 if path.exists() {
1086 Self::load_impl(&path)
1087 } else {
1088 let mut config = Config::default();
1089 config.save()?;
1090 Ok(config)
1091 }
1092 }
1093
1094 pub fn load_from(path: PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1095 if path.exists() {
1096 Self::load_impl(&path)
1097 } else {
1098 Err(format!("Config file not found: {}", path.display()).into())
1099 }
1100 }
1101
1102 fn auto_detect_platform(mut self) -> Self {
1104 if self.default.platform.is_none() {
1105 self.default.platform =
1106 Some(
1107 crate::backend::hardware::platform_name(
1108 crate::backend::hardware::detect_platform(),
1109 )
1110 .to_string(),
1111 );
1112 }
1113 self
1114 }
1115
1116 pub fn save(&mut self) -> Result<(), Box<dyn std::error::Error>> {
1117 let path = Self::config_path();
1118 if let Some(parent) = path.parent() {
1119 std::fs::create_dir_all(parent)?;
1120 }
1121 let content = serde_yaml::to_string(self)?;
1122 std::fs::write(&path, content)?;
1123 let entries: Vec<(String, ModelOverride)> = self
1125 .model_overrides
1126 .keys()
1127 .iter()
1128 .filter_map(|k| self.model_overrides.get(k).map(|v| (k.clone(), v.clone())))
1129 .collect();
1130 for (name, cfg) in entries {
1131 self.model_overrides.save(&name, &cfg);
1132 }
1133 for profile in self.profiles.user_profiles() {
1135 self.profiles.save(&profile);
1136 }
1137 for preset in self.system_prompt_presets.user_presets() {
1139 self.system_prompt_presets.save(&preset);
1140 }
1141 Ok(())
1142 }
1143
1144 pub fn merged_profiles(&self) -> Vec<Profile> {
1145 self.profiles.all()
1146 }
1147
1148 pub fn merged_presets(&self) -> Vec<SystemPromptPreset> {
1149 self.system_prompt_presets.all()
1150 }
1151}
1152
1153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1154pub enum LogLevel {
1155 Info,
1156 Warning,
1157 Error,
1158}
1159
1160impl LogLevel {
1161 pub fn label(&self) -> &'static str {
1162 match self {
1163 LogLevel::Info => "INFO",
1164 LogLevel::Warning => "WARNING",
1165 LogLevel::Error => "ERROR",
1166 }
1167 }
1168}
1169
1170#[derive(Debug, Clone)]
1171pub struct LogEntry {
1172 pub timestamp: String,
1173 pub level: LogLevel,
1174 pub message: String,
1175}
1176
1177impl LogEntry {
1178 pub fn new(message: impl Into<String>, level: LogLevel) -> Self {
1179 let timestamp = Local::now().format("%H:%M:%S").to_string();
1180 let message = sanitize_log(&message.into());
1181 Self {
1182 timestamp,
1183 level,
1184 message,
1185 }
1186 }
1187}
1188
1189fn sanitize_log(input: &str) -> String {
1192 let max_len = 2000;
1194 let chars: Vec<char> = input.chars().collect();
1195 let truncated = chars.len() > max_len;
1196 let chars = if truncated {
1197 chars[..max_len].to_vec()
1198 } else {
1199 chars
1200 };
1201
1202 let mut output = String::with_capacity(chars.len());
1203 for c in chars {
1204 if c.is_control() && c != '\n' && c != '\t' {
1207 continue;
1208 }
1209 output.push(c);
1210 }
1211
1212 let output = output.replace('\t', " ");
1214
1215 let mut result = output.trim_end().to_string();
1217 if truncated {
1218 result.push_str("... (truncated)");
1219 }
1220 result
1221}