1mod model_config;
2mod presets;
3mod profiles;
4mod store;
5
6use std::collections::HashSet;
7use std::path::PathBuf;
8
9use chrono::Local;
10use serde::{Deserialize, Serialize};
11
12pub use model_config::ModelConfigStore;
13
14pub use profiles::ProfileStore;
15
16use crate::models::{
17 Backend, CacheType, CacheTypeK, CacheTypeV, Mirostat, NumMode, RopeScaling, Samplers, SplitMode,
18};
19pub use presets::PresetStore;
20
21pub fn config_base_dir() -> PathBuf {
26 if let Some(d) = dirs::config_dir() {
27 return d;
28 }
29 if let Some(home) = dirs::home_dir() {
30 return home.join(".config");
31 }
32 PathBuf::from(".").join(".llm-manager")
33}
34
35pub fn physical_cores() -> u32 {
38 let content = match std::fs::read_to_string("/proc/cpuinfo") {
39 Ok(c) => c,
40 Err(_) => {
41 return std::thread::available_parallelism()
42 .map(|p| p.get() as u32)
43 .unwrap_or(1);
44 }
45 };
46 let mut seen = HashSet::new();
47 let mut cur_phys: Option<&str> = None;
48 let mut cur_core: Option<&str> = None;
49 for line in content.lines() {
50 if let Some((key, val)) = line.split_once(':') {
51 let key = key.trim();
52 let val = val.trim();
53 match key {
54 "physical id" => cur_phys = Some(val),
55 "core id" => cur_core = Some(val),
56 _ => {}
57 }
58 if let (Some(phys), Some(core)) = (cur_phys, cur_core) {
59 seen.insert((phys, core));
60 }
61 }
62 }
63 seen.len() as u32
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct RpcWorker {
69 #[serde(default)]
70 pub selected: bool,
71 #[serde(default)]
72 pub name: String,
73 pub ip: String,
74 #[serde(default = "default_rpc_port")]
75 pub port: u16,
76}
77
78fn default_rpc_port() -> u16 {
79 50052
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize, Default)]
84pub struct WsServer {
85 #[serde(default)]
86 pub enabled: bool,
87 #[serde(default = "default_ws_port")]
88 pub port: u16,
89 #[serde(default)]
90 pub auth_key: Option<String>,
91 #[serde(default = "default_ws_host")]
92 pub host: String,
93 #[serde(default)]
94 pub tls_enabled: bool,
95 #[serde(default)]
96 pub tls_cert: Option<String>,
97 #[serde(default)]
98 pub tls_key: Option<String>,
99}
100
101fn default_ws_host() -> String {
102 "0.0.0.0".to_string()
103}
104
105fn default_ws_port() -> u16 {
106 49223
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct Config {
112 pub models_dirs: Vec<PathBuf>,
113 pub llama_server: PathBuf,
114 pub default: DefaultParams,
115 #[serde(default, skip)]
117 pub model_overrides: ModelConfigStore,
118 #[serde(default, skip)]
120 pub profiles: ProfileStore,
121 #[serde(default, skip)]
123 pub system_prompt_presets: PresetStore,
124 #[serde(default)]
126 pub rpc_workers: Vec<RpcWorker>,
127 #[serde(default)]
129 pub ws_server: WsServer,
130 #[serde(default = "default_search_limit")]
132 pub search_limit: u32,
133}
134
135fn default_search_limit() -> u32 {
136 50
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
141pub struct Profile {
142 pub name: String,
143 pub description: String,
145 #[serde(default)]
147 pub settings: ModelOverride,
148}
149
150impl Profile {
151 pub fn apply(&self, mut base: crate::models::ModelSettings) -> crate::models::ModelSettings {
153 self.settings.apply(&mut base);
154 base
155 }
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct SystemPromptPreset {
161 pub name: String,
162 pub description: String,
163 pub content: String,
164}
165
166pub fn builtin_system_prompt_presets() -> Vec<SystemPromptPreset> {
168 vec![
169 SystemPromptPreset {
170 name: "General".into(),
171 description: "General-purpose assistant".into(),
172 content: "You are a helpful assistant.".into(),
173 },
174 SystemPromptPreset {
175 name: "Coder".into(),
176 description: "Expert software developer".into(),
177 content: "You are an expert software developer. Write clean, well-documented code. Explain your reasoning and suggest improvements.".into(),
178 },
179 SystemPromptPreset {
180 name: "Thinker".into(),
181 description: "Analytical and thoughtful".into(),
182 content: "You are a thoughtful and analytical AI assistant. Think carefully before answering. Provide well-reasoned responses with clear explanations.".into(),
183 },
184 SystemPromptPreset {
185 name: "Mathematician".into(),
186 description: "Expert in mathematics".into(),
187 content: "You are an expert in mathematics. Provide clear, step-by-step solutions to mathematical problems. Show your reasoning and explain key concepts.".into(),
188 },
189 ]
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
193pub struct ModelOverride {
194 pub context_length: Option<u32>,
196 pub batch_size: Option<u32>,
197 pub ubatch_size: Option<u32>,
198 pub cache_type_k: Option<CacheTypeK>,
199 pub cache_type_v: Option<CacheTypeV>,
200 pub keep: Option<i32>,
201 pub swa_full: Option<bool>,
202 pub mlock: Option<bool>,
203 pub mmap: Option<bool>,
204 pub numa: Option<NumMode>,
205 pub uniform_cache: Option<bool>,
206 pub system_prompt: Option<String>,
207 pub system_prompt_preset_name: Option<String>,
208 pub max_concurrent_predictions: Option<u32>,
209 pub threads: Option<u32>,
210 pub threads_batch: Option<u32>,
211 pub parallel: Option<u32>,
212
213 pub gpu_layers: Option<i32>,
215 pub split_mode: Option<SplitMode>,
216 pub tensor_split: Option<String>,
217 pub main_gpu: Option<i32>,
218 pub fit: Option<bool>,
219 pub lora: Option<PathBuf>,
220 pub lora_scaled: Option<(PathBuf, f32)>,
221 pub rpc: Option<String>,
222 pub embedding: Option<bool>,
223 pub kv_cache_offload: Option<bool>,
224 pub flash_attn: Option<bool>,
225 pub jinja: Option<bool>,
226 pub chat_template: Option<String>,
227 pub chat_template_kwargs: Option<String>,
228 pub expert_count: Option<i32>,
229 pub gpu_layers_mode: Option<crate::models::GpuLayersMode>,
230
231 pub seed: Option<i32>,
233 pub temperature: Option<f32>,
234 pub top_k: Option<i32>,
235 pub top_p: Option<f32>,
236 pub min_p: Option<f32>,
237 pub typical_p: Option<f32>,
238 pub mirostat: Option<Mirostat>,
239 pub mirostat_lr: Option<f32>,
240 pub mirostat_ent: Option<f32>,
241 pub ignore_eos: Option<bool>,
242 pub samplers: Option<Samplers>,
243
244 pub repeat_penalty: Option<f32>,
246 pub repeat_last_n: Option<i32>,
247 pub presence_penalty: Option<f32>,
248 pub frequency_penalty: Option<f32>,
249 pub dry_multiplier: Option<f32>,
250 pub dry_base: Option<f32>,
251 pub dry_allowed_length: Option<i32>,
252 pub dry_penalty_last_n: Option<i32>,
253
254 pub rope_scaling: Option<RopeScaling>,
256 pub rope_scale: Option<f32>,
257 pub rope_freq_base: Option<f32>,
258 pub rope_freq_scale: Option<f32>,
259 pub rope_yarn_enabled: Option<bool>,
260
261 pub cache_prompt: Option<bool>,
263 pub cache_reuse: Option<u32>,
264 pub webui: Option<bool>,
265 pub ws_server_enabled: Option<bool>,
266 pub ws_server_port: Option<u16>,
267 pub ws_server_auth_key: Option<String>,
268 pub ws_server_tls_enabled: Option<bool>,
269 pub ws_server_tls_cert: Option<String>,
270 pub ws_server_tls_key: Option<String>,
271
272 pub max_tokens: Option<u32>,
274 pub cache_type: Option<CacheType>,
275 pub llama_cpp_version_cpu: Option<String>,
276 pub llama_cpp_version_vulkan: Option<String>,
277 pub llama_cpp_version_rocm: Option<String>,
278 pub llama_cpp_version_rocm_lemonade: Option<String>,
279 pub llama_cpp_version_cuda: Option<String>,
280 pub spec_type: Option<String>,
281 pub draft_tokens: Option<u32>,
282 pub tags: Option<Vec<String>>,
283}
284
285macro_rules! apply_scalar {
287 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
288 $(
289 $base.$field = $self.$field.unwrap_or($base.$field);
290 )+
291 };
292}
293
294macro_rules! apply_clone {
296 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
297 $(
298 if let Some(v) = &$self.$field {
299 $base.$field = v.clone();
300 }
301 )+
302 };
303}
304
305macro_rules! apply_option {
307 ($self:ident, $base:ident, $($field:ident),+ $(,)?) => {
308 $(
309 if let Some(v) = &$self.$field {
310 $base.$field = Some(v.clone());
311 }
312 )+
313 };
314}
315
316impl ModelOverride {
317 pub fn from_settings(s: &crate::models::ModelSettings) -> Self {
318 Self {
319 context_length: Some(s.context_length),
320 batch_size: Some(s.batch_size),
321 ubatch_size: Some(s.ubatch_size),
322 cache_type_k: s.cache_type_k,
323 cache_type_v: s.cache_type_v,
324 keep: Some(s.keep),
325 swa_full: Some(s.swa_full),
326 mlock: Some(s.mlock),
327 mmap: Some(s.mmap),
328 numa: Some(s.numa),
329 uniform_cache: Some(s.uniform_cache),
330 system_prompt: Some(s.system_prompt.clone()),
331 system_prompt_preset_name: Some(s.system_prompt_preset_name.clone()),
332 max_concurrent_predictions: s.max_concurrent_predictions,
333 threads: Some(s.threads),
334 threads_batch: Some(s.threads_batch),
335 parallel: Some(s.parallel),
336 gpu_layers: Some(match s.gpu_layers_mode {
337 crate::models::GpuLayersMode::Auto => 0,
338 crate::models::GpuLayersMode::Specific(n) => n as i32,
339 crate::models::GpuLayersMode::All => -1,
340 }),
341 gpu_layers_mode: Some(s.gpu_layers_mode),
342 split_mode: Some(s.split_mode),
343 tensor_split: Some(s.tensor_split.clone()),
344 main_gpu: Some(s.main_gpu),
345 fit: Some(s.fit),
346 lora: s.lora.clone(),
347 lora_scaled: s.lora_scaled.clone(),
348 rpc: Some(s.rpc.clone()),
349 embedding: Some(s.embedding),
350 kv_cache_offload: Some(s.kv_cache_offload),
351 flash_attn: Some(s.flash_attn),
352 jinja: Some(s.jinja),
353 chat_template: s.chat_template.clone(),
354 chat_template_kwargs: s.chat_template_kwargs.clone(),
355 expert_count: Some(s.expert_count),
356 seed: Some(s.seed),
357 temperature: Some(s.temperature),
358 top_k: Some(s.top_k),
359 top_p: Some(s.top_p),
360 min_p: Some(s.min_p),
361 typical_p: Some(s.typical_p),
362 mirostat: Some(s.mirostat),
363 mirostat_lr: Some(s.mirostat_lr),
364 mirostat_ent: Some(s.mirostat_ent),
365 ignore_eos: Some(s.ignore_eos),
366 samplers: Some(s.samplers.clone()),
367 repeat_penalty: Some(s.repeat_penalty),
368 repeat_last_n: Some(s.repeat_last_n),
369 presence_penalty: s.presence_penalty,
370 frequency_penalty: s.frequency_penalty,
371 dry_multiplier: Some(s.dry_multiplier),
372 dry_base: Some(s.dry_base),
373 dry_allowed_length: Some(s.dry_allowed_length),
374 dry_penalty_last_n: Some(s.dry_penalty_last_n),
375 rope_scaling: Some(s.rope_scaling),
376 rope_scale: Some(s.rope_scale),
377 rope_freq_base: Some(s.rope_freq_base),
378 rope_freq_scale: Some(s.rope_freq_scale),
379 rope_yarn_enabled: Some(s.rope_yarn_enabled),
380 cache_prompt: Some(s.cache_prompt),
381 cache_reuse: Some(s.cache_reuse),
382 webui: Some(s.webui),
383 max_tokens: s.max_tokens,
384 cache_type: Some(s.cache_type),
385 llama_cpp_version_cpu: s.llama_cpp_version_cpu.clone(),
386 llama_cpp_version_vulkan: s.llama_cpp_version_vulkan.clone(),
387 llama_cpp_version_rocm: s.llama_cpp_version_rocm.clone(),
388 llama_cpp_version_rocm_lemonade: s.llama_cpp_version_rocm_lemonade.clone(),
389 llama_cpp_version_cuda: s.llama_cpp_version_cuda.clone(),
390 spec_type: Some(s.spec_type.clone()),
391 draft_tokens: Some(s.draft_tokens),
392 tags: Some(s.tags.clone()),
393 ws_server_enabled: Some(s.ws_server_enabled),
394 ws_server_port: Some(s.ws_server_port),
395 ws_server_auth_key: s.ws_server_auth_key.clone(),
396 ws_server_tls_enabled: Some(s.ws_server_tls_enabled),
397 ws_server_tls_cert: s.ws_server_tls_cert.clone(),
398 ws_server_tls_key: s.ws_server_tls_key.clone(),
399 }
400 }
401
402 pub fn apply(&self, base: &mut crate::models::ModelSettings) {
404 apply_scalar!(self, base,
409 context_length, batch_size, ubatch_size, keep, swa_full, mlock, mmap,
410 numa, uniform_cache, kv_cache_offload, threads, threads_batch, parallel,
411 split_mode, main_gpu, fit, embedding, flash_attn, jinja, expert_count,
412 seed, temperature, top_k, top_p, min_p, typical_p,
413 mirostat, mirostat_lr, mirostat_ent, ignore_eos,
414 repeat_penalty, repeat_last_n,
415 dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
416 rope_scaling, rope_scale, rope_freq_base, rope_freq_scale, rope_yarn_enabled,
417 cache_prompt, cache_reuse, webui, cache_type,
418 ws_server_enabled, ws_server_port, ws_server_tls_enabled,
419 draft_tokens, gpu_layers_mode,
420 );
421
422 apply_clone!(self, base,
424 system_prompt, system_prompt_preset_name, tensor_split, rpc,
425 samplers, spec_type, tags,
426 );
427
428 apply_option!(self, base,
430 lora, lora_scaled, chat_template, chat_template_kwargs,
431 llama_cpp_version_cpu, llama_cpp_version_vulkan,
432 llama_cpp_version_rocm, llama_cpp_version_rocm_lemonade,
433 llama_cpp_version_cuda,
434 ws_server_auth_key, ws_server_tls_cert, ws_server_tls_key,
435 );
436
437 base.cache_type_k = self.cache_type_k;
439 base.cache_type_v = self.cache_type_v;
440 base.presence_penalty = self.presence_penalty;
441 base.frequency_penalty = self.frequency_penalty;
442 base.max_tokens = self.max_tokens;
443
444 base.max_concurrent_predictions = self
446 .max_concurrent_predictions
447 .or(base.max_concurrent_predictions);
448
449 if let Some(n) = self.gpu_layers {
452 base.gpu_layers_mode = match n {
453 n if n < 0 => crate::models::GpuLayersMode::All,
454 n => crate::models::GpuLayersMode::Specific(n as u32),
455 };
456 }
457
458 }
474}
475
476pub fn builtin_profiles() -> Vec<Profile> {
478 vec![
479 Profile {
480 name: "Qwen".into(),
481 description: "Optimized for Qwen models (dense)".into(),
482 settings: ModelOverride {
483 context_length: Some(131072),
484 temperature: Some(0.7),
485 top_k: Some(20),
486 top_p: Some(0.95),
487 max_tokens: Some(4096),
488 presence_penalty: Some(0.0),
489 uniform_cache: Some(true),
490 jinja: Some(true),
491 ..Default::default()
492 },
493 },
494 Profile {
495 name: "Qwen-MoE".into(),
496 description: "Optimized for Qwen MoE models (35B-A3B)".into(),
497 settings: ModelOverride {
498 context_length: Some(131072),
499 temperature: Some(0.8),
500 top_k: Some(20),
501 top_p: Some(0.95),
502 max_tokens: Some(4096),
503 presence_penalty: Some(1.5),
504 uniform_cache: Some(true),
505 jinja: Some(true),
506 ..Default::default()
507 },
508 },
509 Profile {
510 name: "Qwen-Coding".into(),
511 description: "Optimized for Qwen models in coding mode".into(),
512 settings: ModelOverride {
513 context_length: Some(131072),
514 temperature: Some(0.6),
515 top_k: Some(20),
516 top_p: Some(0.95),
517 max_tokens: Some(4096),
518 presence_penalty: Some(0.0),
519 uniform_cache: Some(true),
520 jinja: Some(true),
521 ..Default::default()
522 },
523 },
524 Profile {
525 name: "Gemma".into(),
526 description: "Optimized for Gemma 2/4 models".into(),
527 settings: ModelOverride {
528 context_length: Some(131072),
529 min_p: Some(0.1),
530 temperature: Some(1.0),
531 top_k: Some(65),
532 top_p: Some(0.95),
533 max_tokens: Some(4096),
534 uniform_cache: Some(true),
535 jinja: Some(true),
536 ..Default::default()
537 },
538 },
539 Profile {
540 name: "Llama".into(),
541 description: "Optimized for Llama 3.1/3.3 models".into(),
542 settings: ModelOverride {
543 context_length: Some(131072),
544 temperature: Some(0.7),
545 top_p: Some(0.9),
546 repeat_penalty: Some(1.1),
547 max_tokens: Some(4096),
548 uniform_cache: Some(true),
549 jinja: Some(true),
550 ..Default::default()
551 },
552 },
553 Profile {
554 name: "Mistral".into(),
555 description: "Optimized for Mistral 7B/NeMo models".into(),
556 settings: ModelOverride {
557 context_length: Some(131072),
558 temperature: Some(0.7),
559 top_k: Some(50),
560 top_p: Some(0.9),
561 max_tokens: Some(4096),
562 uniform_cache: Some(true),
563 jinja: Some(true),
564 ..Default::default()
565 },
566 },
567 Profile {
568 name: "Phi".into(),
569 description: "Optimized for Phi 3.5 Mini models".into(),
570 settings: ModelOverride {
571 context_length: Some(131072),
572 temperature: Some(0.7),
573 top_k: Some(50),
574 top_p: Some(0.9),
575 repeat_penalty: Some(1.1),
576 max_tokens: Some(4096),
577 uniform_cache: Some(true),
578 ..Default::default()
579 },
580 },
581 ]
582}
583
584#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
585#[serde(default)]
586pub struct DefaultParams {
587 #[serde(default)]
589 pub context_length: u32,
590 #[serde(default)]
591 pub threads: u32,
592 #[serde(default)]
593 pub threads_batch: u32,
594 #[serde(default)]
595 pub batch_size: u32,
596 #[serde(default)]
597 pub ubatch_size: u32,
598 #[serde(default = "default_cache_type_k")]
599 pub cache_type_k: Option<CacheTypeK>,
600 #[serde(default = "default_cache_type_v")]
601 pub cache_type_v: Option<CacheTypeV>,
602 #[serde(default)]
603 pub keep: i32,
604 #[serde(default)]
605 pub swa_full: bool,
606 #[serde(default)]
607 pub mlock: bool,
608 #[serde(default)]
609 pub mmap: bool,
610 #[serde(default)]
611 pub numa: NumMode,
612 #[serde(default)]
613 pub uniform_cache: bool,
614 #[serde(default)]
615 pub kv_cache_offload: bool,
616 #[serde(default)]
617 pub parallel: u32,
618 #[serde(default)]
619 pub max_concurrent_predictions: Option<u32>,
620 #[serde(default)]
621 pub system_prompt: String,
622 #[serde(default = "default_system_prompt_preset_name")]
623 pub system_prompt_preset_name: String,
624 #[serde(default)]
626 pub gpu_layers: i32,
627 #[serde(default = "default_gpu_layers_mode")]
628 pub gpu_layers_mode: crate::models::GpuLayersMode,
629 #[serde(default)]
630 pub split_mode: SplitMode,
631 #[serde(default)]
632 pub tensor_split: String,
633 #[serde(default)]
634 pub main_gpu: i32,
635 #[serde(default)]
636 pub fit: bool,
637 #[serde(default)]
638 pub lora: Option<PathBuf>,
639 #[serde(default)]
640 pub lora_scaled: Option<(PathBuf, f32)>,
641 #[serde(default)]
642 pub rpc: String,
643 #[serde(default)]
644 pub embedding: bool,
645 #[serde(default)]
646 pub flash_attn: bool,
647 #[serde(default)]
648 pub jinja: bool,
649 #[serde(default)]
650 pub chat_template: Option<String>,
651 #[serde(default)]
652 pub chat_template_kwargs: Option<String>,
653 #[serde(default)]
654 pub expert_count: i32,
655
656 #[serde(default)]
658 pub seed: i32,
659 #[serde(default)]
660 pub temperature: f32,
661 #[serde(default)]
662 pub top_k: i32,
663 #[serde(default)]
664 pub top_p: f32,
665 #[serde(default)]
666 pub min_p: f32,
667 #[serde(default)]
668 pub typical_p: f32,
669 #[serde(default)]
670 pub mirostat: Mirostat,
671 #[serde(default)]
672 pub mirostat_lr: f32,
673 #[serde(default)]
674 pub mirostat_ent: f32,
675 #[serde(default)]
676 pub ignore_eos: bool,
677 #[serde(default)]
678 pub samplers: Samplers,
679
680 #[serde(default)]
682 pub repeat_penalty: f32,
683 #[serde(default)]
684 pub repeat_last_n: i32,
685 #[serde(default = "default_presence_penalty")]
686 pub presence_penalty: Option<f32>,
687 #[serde(default = "default_frequency_penalty")]
688 pub frequency_penalty: Option<f32>,
689 #[serde(default)]
690 pub dry_multiplier: f32,
691 #[serde(default)]
692 pub dry_base: f32,
693 #[serde(default)]
694 pub dry_allowed_length: i32,
695 #[serde(default)]
696 pub dry_penalty_last_n: i32,
697
698 #[serde(default)]
700 pub rope_scaling: RopeScaling,
701 #[serde(default)]
702 pub rope_scale: f32,
703 #[serde(default)]
704 pub rope_freq_base: f32,
705 #[serde(default)]
706 pub rope_freq_scale: f32,
707 #[serde(default)]
708 pub rope_yarn_enabled: bool,
709
710 #[serde(default)]
712 pub host: String,
713 #[serde(default)]
714 pub port: u16,
715 #[serde(default)]
716 pub timeout: u32,
717 #[serde(default = "default_cache_prompt")]
718 pub cache_prompt: bool,
719 #[serde(default)]
720 pub cache_reuse: u32,
721 #[serde(default)]
722 pub webui: bool,
723 #[serde(default)]
724 pub ws_server_enabled: bool,
725 #[serde(default = "default_ws_server_port")]
726 pub ws_server_port: u16,
727 #[serde(default)]
728 pub ws_server_auth_key: Option<String>,
729 #[serde(default)]
730 pub ws_server_tls_enabled: bool,
731 #[serde(default)]
732 pub ws_server_tls_cert: Option<String>,
733 #[serde(default)]
734 pub ws_server_tls_key: Option<String>,
735 #[serde(default)]
736 pub router_max_models: u32,
737 #[serde(default)]
738 pub server_mode: crate::models::ServerMode,
739
740 #[serde(default = "default_max_tokens")]
742 pub max_tokens: Option<u32>,
743 #[serde(default)]
744 pub cache_type: CacheType,
745 #[serde(default)]
746 pub backend: Backend,
747 #[serde(default)]
749 pub platform: Option<String>,
750 #[serde(default)]
751 pub llama_cpp_version_cpu: Option<String>,
752 #[serde(default)]
753 pub llama_cpp_version_vulkan: Option<String>,
754 #[serde(default)]
755 pub llama_cpp_version_rocm: Option<String>,
756 #[serde(default)]
757 pub llama_cpp_version_rocm_lemonade: Option<String>,
758 #[serde(default)]
759 pub llama_cpp_version_cuda: Option<String>,
760
761 #[serde(default)]
763 pub api_endpoint_enabled: bool,
764 #[serde(default = "default_api_endpoint_port")]
765 pub api_endpoint_port: u16,
766 #[serde(default)]
767 pub spec_type: String,
768 #[serde(default)]
769 pub draft_tokens: u32,
770 #[serde(default)]
771 pub tags: Vec<String>,
772}
773
774fn default_api_endpoint_port() -> u16 {
775 49222
776}
777
778fn default_system_prompt_preset_name() -> String {
779 "General".to_string()
780}
781
782fn default_cache_type_k() -> Option<CacheTypeK> {
783 None
784}
785fn default_cache_type_v() -> Option<CacheTypeV> {
786 None
787}
788fn default_presence_penalty() -> Option<f32> {
789 None
790}
791fn default_frequency_penalty() -> Option<f32> {
792 None
793}
794fn default_max_tokens() -> Option<u32> {
795 None
796}
797fn default_cache_prompt() -> bool {
798 true
799}
800fn default_ws_server_port() -> u16 {
801 49223
802}
803fn default_gpu_layers_mode() -> crate::models::GpuLayersMode {
804 crate::models::GpuLayersMode::Auto
805}
806
807impl Default for DefaultParams {
808 fn default() -> Self {
809 Self {
810 context_length: 131072,
812 threads: physical_cores(),
813 threads_batch: 8,
814 batch_size: 512,
815 ubatch_size: 512,
816 cache_type_k: None,
817 cache_type_v: None,
818 keep: 0,
819 swa_full: false,
820 mlock: false,
821 mmap: true,
822 numa: NumMode::None,
823 uniform_cache: true,
824 kv_cache_offload: true,
825 parallel: 1,
826 max_concurrent_predictions: None,
827 system_prompt: "You are a helpful assistant.".to_string(),
828 system_prompt_preset_name: "General".to_string(),
829
830 gpu_layers: -1,
832 gpu_layers_mode: crate::models::GpuLayersMode::Auto,
833 split_mode: SplitMode::Layer,
834 tensor_split: String::new(),
835 main_gpu: 0,
836 fit: true,
837 lora: None,
838 lora_scaled: None,
839 rpc: String::new(),
840 embedding: false,
841 flash_attn: true,
842 jinja: true,
843 chat_template: None,
844 chat_template_kwargs: None,
845 expert_count: -1,
846
847 seed: -1,
849 temperature: 0.8,
850 top_k: 40,
851 top_p: 0.95,
852 min_p: 0.0,
853 typical_p: 1.0,
854 mirostat: Mirostat::Off,
855 mirostat_lr: 0.1,
856 mirostat_ent: 5.0,
857 ignore_eos: false,
858 samplers: Samplers::default(),
859
860 repeat_penalty: 1.1,
862 repeat_last_n: 64,
863 presence_penalty: None,
864 frequency_penalty: None,
865 dry_multiplier: 0.0,
866 dry_base: 1.75,
867 dry_allowed_length: 2,
868 dry_penalty_last_n: -1,
869
870 rope_scaling: RopeScaling::None,
872 rope_scale: 1.0,
873 rope_freq_base: 0.0,
874 rope_freq_scale: 1.0,
875 rope_yarn_enabled: false,
876
877 host: "127.0.0.1".to_string(),
879 port: 8080,
880 timeout: 600,
881 cache_prompt: true,
882 cache_reuse: 0,
883 webui: false,
884 ws_server_enabled: false,
885 ws_server_port: 49223,
886 ws_server_auth_key: None,
887 ws_server_tls_enabled: false,
888 ws_server_tls_cert: None,
889 ws_server_tls_key: None,
890 router_max_models: 4,
891 server_mode: crate::models::ServerMode::Normal,
892
893 max_tokens: None,
895 cache_type: CacheType::F16,
896 backend: {
897 use crate::backend::hardware::{GpuVendor, detect_gpu_vendors};
898 let vendors = detect_gpu_vendors();
899 let mut result = Backend::Cpu;
900 for v in &vendors {
901 if matches!(v, GpuVendor::Nvidia) {
902 result = Backend::Cuda;
903 break;
904 }
905 if matches!(v, GpuVendor::Amd) {
906 result = Backend::Rocm;
907 break;
908 }
909 if matches!(v, GpuVendor::Intel) {
910 result = Backend::Vulkan;
911 break;
912 }
913 }
914 result
915 },
916 platform: None,
917 llama_cpp_version_cpu: None,
918 llama_cpp_version_vulkan: None,
919 llama_cpp_version_rocm: None,
920 llama_cpp_version_rocm_lemonade: None,
921 llama_cpp_version_cuda: None,
922 api_endpoint_enabled: false,
923 api_endpoint_port: 49222,
924 spec_type: String::new(),
925 draft_tokens: 0,
926 tags: Vec::new(),
927 }
928 }
929}
930
931impl Default for Config {
932 fn default() -> Self {
933 Self {
934 models_dirs: vec![
935 dirs::data_dir()
936 .unwrap_or_default()
937 .join("llm-manager")
938 .join("models"),
939 ],
940 llama_server: "llama-server".into(),
941 default: DefaultParams::default(),
942 model_overrides: Default::default(),
943 profiles: Default::default(),
944 system_prompt_presets: Default::default(),
945 rpc_workers: Vec::new(),
946 ws_server: WsServer {
947 enabled: false,
948 port: 49223,
949 auth_key: None,
950 host: "0.0.0.0".to_string(),
951 tls_enabled: false,
952 tls_cert: None,
953 tls_key: None,
954 },
955 search_limit: default_search_limit(),
956 }
957 }
958}
959
960impl Config {
961 pub fn config_path() -> PathBuf {
962 config_base_dir()
963 .join("llm-manager")
964 .join("config.yaml")
965 }
966
967 pub fn validate(&self) -> Vec<String> {
969 let mut warnings = Vec::new();
970 let default = &self.default;
971
972 if default.context_length < 512 || default.context_length > 131072 {
974 warnings.push(format!(
975 "context_length {} is outside recommended range 512-131072",
976 default.context_length
977 ));
978 }
979 if default.temperature < 0.0 || default.temperature > 2.0 {
980 warnings.push(format!(
981 "temperature {} is outside recommended range 0.0-2.0",
982 default.temperature
983 ));
984 }
985 if (default.top_p < 0.0 || default.top_p > 1.0) && default.top_p != 0.0 {
986 warnings.push(format!(
987 "top_p {} is outside recommended range 0.0-1.0",
988 default.top_p
989 ));
990 }
991 if (default.repeat_penalty < 0.0 || default.repeat_penalty > 3.0)
992 && default.repeat_penalty != 1.0
993 {
994 warnings.push(format!(
995 "repeat_penalty {} is outside recommended range 0.0-3.0",
996 default.repeat_penalty
997 ));
998 }
999 if default.mirostat_lr < 0.0 || default.mirostat_lr > 1.0 {
1000 warnings.push(format!(
1001 "mirostat_lr {} is outside recommended range 0.0-1.0",
1002 default.mirostat_lr
1003 ));
1004 }
1005 if default.mirostat_ent < 0.0 || default.mirostat_ent > 10.0 {
1006 warnings.push(format!(
1007 "mirostat_ent {} is outside recommended range 0.0-10.0",
1008 default.mirostat_ent
1009 ));
1010 }
1011
1012 if default.timeout < 1 {
1013 warnings.push(format!(
1014 "timeout {} must be at least 1 second",
1015 default.timeout
1016 ));
1017 }
1018
1019 if let Some(lora) = &default.lora
1021 && !lora.exists() {
1022 warnings.push(format!("lora path {} does not exist", lora.display()));
1023 }
1024 if let Some((lora, _)) = &default.lora_scaled
1025 && !lora.exists() {
1026 warnings.push(format!("lora path {} does not exist", lora.display()));
1027 }
1028
1029 for model_name in self.model_overrides.keys() {
1031 if let Some(override_settings) = self.model_overrides.get(model_name.as_str()) {
1032 if let Some(lora) = &override_settings.lora
1033 && !lora.exists() {
1034 warnings.push(format!(
1035 "model '{}' lora path {} does not exist",
1036 model_name,
1037 lora.display()
1038 ));
1039 }
1040 if let Some((lora, _)) = &override_settings.lora_scaled
1041 && !lora.exists() {
1042 warnings.push(format!(
1043 "model '{}' lora path {} does not exist",
1044 model_name,
1045 lora.display()
1046 ));
1047 }
1048 }
1049 }
1050
1051 warnings
1052 }
1053
1054 pub fn resolve_settings(
1056 &self,
1057 model_name: Option<&str>,
1058 profile_name: Option<&str>,
1059 ) -> crate::models::ModelSettings {
1060 let mut settings = crate::models::ModelSettings::from_config(self);
1061
1062 if let Some(name) = model_name
1064 && let Some(override_settings) = self.model_overrides.get(name)
1065 {
1066 override_settings.apply(&mut settings);
1067 }
1068
1069 if let Some(p_name) = profile_name {
1071 if let Some(profile) = self.profiles.get(p_name) {
1072 profile.settings.apply(&mut settings);
1073 } else if let Some(profile) = builtin_profiles().iter().find(|p| p.name == p_name) {
1074 profile.settings.apply(&mut settings);
1075 }
1076 }
1077
1078 settings
1079 }
1080
1081 pub fn get_preset_content(&self, name: &str) -> Option<String> {
1083 self.system_prompt_presets
1084 .get(name)
1085 .map(|p| p.content.clone())
1086 }
1087
1088 fn normalize_config(mut config: Config) -> Config {
1089 for path in &mut config.models_dirs {
1091 let path_str = path.to_string_lossy();
1092 if let Some(stripped) = path_str.strip_prefix("~/") {
1093 let home = dirs::home_dir().unwrap_or_default();
1094 *path = home.join(stripped);
1095 } else if !path.is_absolute() {
1096 let home = dirs::home_dir().unwrap_or_default();
1097 *path = home.join(path_str.as_ref());
1098 }
1099 }
1100
1101 for p in builtin_profiles() {
1103 if config.profiles.get(&p.name).is_none() {
1104 config.profiles.insert_builtin(p);
1105 }
1106 }
1107
1108 for p in builtin_system_prompt_presets() {
1110 if config.system_prompt_presets.get(&p.name).is_none() {
1111 config.system_prompt_presets.insert_builtin(p);
1112 }
1113 }
1114 config
1115 }
1116
1117 fn load_impl(path: &PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1118 let content = std::fs::read_to_string(path)?;
1119 let config: Config = serde_yaml::from_str(&content)
1120 .map_err(|e| format!("Failed to parse config file {}: {}", path.display(), e))?;
1121 let config = Self::normalize_config(config);
1122 let config = config.auto_detect_platform();
1123 let warnings = config.validate();
1124 if !warnings.is_empty() {
1125 eprintln!("Config validation warnings:");
1126 for warning in &warnings {
1127 eprintln!(" - {}", warning);
1128 }
1129 }
1130 Ok(config)
1131 }
1132
1133 pub fn load() -> Result<Self, Box<dyn std::error::Error>> {
1134 let path = Self::config_path();
1135 if path.exists() {
1136 Self::load_impl(&path)
1137 } else {
1138 let mut config = Config::default();
1139 config.save()?;
1140 Ok(config)
1141 }
1142 }
1143
1144 pub fn load_from(path: PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
1145 if path.exists() {
1146 Self::load_impl(&path)
1147 } else {
1148 Err(format!("Config file not found: {}", path.display()).into())
1149 }
1150 }
1151
1152 fn auto_detect_platform(mut self) -> Self {
1154 if self.default.platform.is_none() {
1155 self.default.platform =
1156 Some(
1157 crate::backend::hardware::platform_name(
1158 crate::backend::hardware::detect_platform(),
1159 )
1160 .to_string(),
1161 );
1162 }
1163 self
1164 }
1165
1166 pub fn save(&mut self) -> Result<(), Box<dyn std::error::Error>> {
1167 let path = Self::config_path();
1168 if let Some(parent) = path.parent() {
1169 std::fs::create_dir_all(parent)?;
1170 }
1171 let content = serde_yaml::to_string(self)?;
1172 std::fs::write(&path, content)?;
1173 let entries: Vec<(String, ModelOverride)> = self
1175 .model_overrides
1176 .keys()
1177 .iter()
1178 .filter_map(|k| self.model_overrides.get(k).map(|v| (k.clone(), v.clone())))
1179 .collect();
1180 for (name, cfg) in entries {
1181 self.model_overrides.save(&name, &cfg);
1182 }
1183 for profile in self.profiles.user_profiles() {
1185 self.profiles.save(&profile);
1186 }
1187 for preset in self.system_prompt_presets.user_presets() {
1189 self.system_prompt_presets.save(&preset);
1190 }
1191 Ok(())
1192 }
1193
1194 pub fn merged_profiles(&self) -> Vec<Profile> {
1195 self.profiles.all()
1196 }
1197
1198 pub fn merged_presets(&self) -> Vec<SystemPromptPreset> {
1199 self.system_prompt_presets.all()
1200 }
1201}
1202
1203#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1204pub enum LogLevel {
1205 Info,
1206 Warning,
1207 Error,
1208}
1209
1210impl LogLevel {
1211 pub fn label(&self) -> &'static str {
1212 match self {
1213 LogLevel::Info => "INFO",
1214 LogLevel::Warning => "WARNING",
1215 LogLevel::Error => "ERROR",
1216 }
1217 }
1218}
1219
1220#[derive(Debug, Clone)]
1221pub struct LogEntry {
1222 pub timestamp: String,
1223 pub level: LogLevel,
1224 pub message: String,
1225}
1226
1227impl LogEntry {
1228 pub fn new(message: impl Into<String>, level: LogLevel) -> Self {
1229 let timestamp = Local::now().format("%H:%M:%S").to_string();
1230 let message = sanitize_log(&message.into());
1231 Self {
1232 timestamp,
1233 level,
1234 message,
1235 }
1236 }
1237}
1238
1239fn sanitize_log(input: &str) -> String {
1242 let max_len = 2000;
1244 let chars: Vec<char> = input.chars().collect();
1245 let truncated = chars.len() > max_len;
1246 let chars = if truncated {
1247 chars[..max_len].to_vec()
1248 } else {
1249 chars
1250 };
1251
1252 let mut output = String::with_capacity(chars.len());
1253 for c in chars {
1254 if c.is_control() && c != '\n' && c != '\t' {
1257 continue;
1258 }
1259 output.push(c);
1260 }
1261
1262 let output = output.replace('\t', " ");
1264
1265 let mut result = output.trim_end().to_string();
1267 if truncated {
1268 result.push_str("... (truncated)");
1269 }
1270 result
1271}