1use anyhow::Result;
4use std::env;
5use std::net::SocketAddr;
6use std::path::PathBuf;
7use sysinfo::System;
8use tracing::{debug, info, warn};
9
10#[cfg(all(feature = "nvidia", any(target_os = "windows", target_os = "linux")))]
12use nvml_wrapper::Nvml;
13
14#[allow(dead_code)]
15#[derive(Debug, Clone)]
16pub struct Config {
17 pub model_path: String,
18 pub llama_bin: String,
19 pub llama_host: String,
20 pub llama_port: u16,
21 pub ctx_size: u32,
22 pub batch_size: u32,
23 pub threads: u32,
24 pub gpu_layers: u32,
25 pub health_timeout_seconds: u64,
26 pub hot_swap_grace_seconds: u64,
27 pub max_concurrent_streams: u32,
28 pub prometheus_port: u16,
29 pub api_host: String,
30 pub api_port: u16,
31 pub requests_per_second: u32,
32 pub generate_timeout_seconds: u64,
33 pub stream_timeout_seconds: u64,
34 pub health_check_timeout_seconds: u64,
35 pub queue_size: usize,
36 pub queue_timeout_seconds: u64,
37 pub backend_url: String,
38 pub openrouter_api_key: String,
39}
40
41impl Config {
42 pub fn from_env() -> Result<Self> {
43 let mut env_loaded = false;
49
50 if let Ok(exe_path) = std::env::current_exe() {
52 if let Some(exe_dir) = exe_path.parent() {
53 let env_path = exe_dir.join(".env");
54 if env_path.exists() {
55 match dotenvy::from_path(&env_path) {
56 Ok(_) => {
57 info!("Loaded .env from executable directory: {:?}", env_path);
58 env_loaded = true;
59 }
60 Err(e) => {
61 warn!("Failed to load .env from {:?}: {}", env_path, e);
62 }
63 }
64 }
65
66 #[cfg(target_os = "macos")]
69 if !env_loaded {
70 if let Some(contents_dir) = exe_dir.parent() {
73 let bundle_env = contents_dir.join("Resources").join(".env");
74 if bundle_env.exists() {
75 match dotenvy::from_path(&bundle_env) {
76 Ok(_) => {
77 info!("Loaded .env from macOS bundle Resources: {:?}", bundle_env);
78 env_loaded = true;
79 }
80 Err(e) => {
81 warn!("Failed to load .env from bundle Resources {:?}: {}", bundle_env, e);
82 }
83 }
84 }
85 }
86 }
87
88 #[cfg(target_os = "macos")]
91 if !env_loaded {
92 if let Some(app_support) = dirs::data_dir() {
93 let user_env = app_support.join("Aud.io").join(".env");
94 if user_env.exists() {
95 match dotenvy::from_path(&user_env) {
96 Ok(_) => {
97 info!("Loaded .env from user data directory: {:?}", user_env);
98 env_loaded = true;
99 }
100 Err(e) => {
101 warn!("Failed to load .env from user data dir {:?}: {}", user_env, e);
102 }
103 }
104 }
105 }
106 }
107
108 if !env_loaded {
110 let project_root = if exe_dir.ends_with("target/release")
111 || exe_dir.ends_with("target\\release")
112 {
113 exe_dir.parent().and_then(|p| p.parent())
114 } else {
115 None
116 };
117
118 if let Some(root) = project_root {
119 let root_env = root.join(".env");
120 if root_env.exists() {
121 match dotenvy::from_path(&root_env) {
122 Ok(_) => {
123 info!("Loaded .env from project root: {:?}", root_env);
124 env_loaded = true;
125 }
126 Err(e) => {
127 warn!(
128 "Failed to load .env from project root {:?}: {}",
129 root_env, e
130 );
131 }
132 }
133 }
134 }
135 }
136 }
137 }
138
139 if !env_loaded {
141 if let Err(e) = dotenvy::dotenv() {
142 warn!("Failed to load .env from current directory: {}. Using system environment variables.", e);
143 } else {
144 info!("Loaded environment variables from .env file in current directory");
145 }
146 }
147
148 let llama_bin = Self::get_llama_binary_path()?;
150 info!("Using llama binary: {}", llama_bin);
151
152 let model_path = Self::get_model_path_with_fallback()?;
154
155 let threads = if env::var("THREADS").unwrap_or_else(|_| "auto".into()) == "auto" {
157 Self::auto_detect_threads()
158 } else {
159 env::var("THREADS")
160 .unwrap_or_else(|_| "6".into())
161 .parse()
162 .unwrap_or(6)
163 };
164
165 let gpu_layers = if env::var("GPU_LAYERS").unwrap_or_else(|_| "auto".into()) == "auto" {
167 Self::auto_detect_gpu_layers()
168 } else {
169 env::var("GPU_LAYERS")
170 .unwrap_or_else(|_| "20".into())
171 .parse()
172 .unwrap_or(20)
173 };
174
175 let ctx_size = if env::var("CTX_SIZE").unwrap_or_else(|_| "auto".into()) == "auto" {
176 Self::auto_detect_ctx_size(&model_path)
177 } else {
178 env::var("CTX_SIZE")
179 .unwrap_or_else(|_| "8192".into())
180 .parse()
181 .unwrap_or(8192)
182 };
183
184 let batch_size = if env::var("BATCH_SIZE").unwrap_or_else(|_| "auto".into()) == "auto" {
186 Self::auto_detect_batch_size(gpu_layers, ctx_size)
187 } else {
188 env::var("BATCH_SIZE")
189 .unwrap_or_else(|_| "256".into())
190 .parse()
191 .unwrap_or(256)
192 };
193
194 let llama_host = env::var("LLAMA_HOST").unwrap_or_else(|_| "127.0.0.1".into());
196 let llama_port = env::var("LLAMA_PORT")
197 .unwrap_or_else(|_| "8081".into())
198 .parse()?;
199 let backend_url = format!("http://{}:{}", llama_host, llama_port);
200
201 let openrouter_api_key = env::var("OPENROUTER_API_KEY").unwrap_or_default();
203
204 info!(
205 "Resource Configuration: {} GPU layers, {} threads, batch size: {}, context: {}",
206 gpu_layers, threads, batch_size, ctx_size
207 );
208
209 Ok(Self {
210 model_path,
211 llama_bin,
212 llama_host: llama_host.clone(),
213 llama_port,
214 ctx_size,
215 batch_size,
216 threads,
217 gpu_layers,
218 health_timeout_seconds: env::var("HEALTH_TIMEOUT_SECONDS")
219 .unwrap_or_else(|_| "60".into())
220 .parse()?,
221 hot_swap_grace_seconds: env::var("HOT_SWAP_GRACE_SECONDS")
222 .unwrap_or_else(|_| "25".into())
223 .parse()?,
224 max_concurrent_streams: env::var("MAX_CONCURRENT_STREAMS")
225 .unwrap_or_else(|_| "4".into())
226 .parse()?,
227 prometheus_port: env::var("PROMETHEUS_PORT")
228 .unwrap_or_else(|_| "9000".into())
229 .parse()?,
230 api_host: env::var("API_HOST").unwrap_or_else(|_| "127.0.0.1".into()),
231 api_port: env::var("API_PORT")
232 .unwrap_or_else(|_| "9999".into())
233 .parse()?,
234 requests_per_second: env::var("REQUESTS_PER_SECOND")
235 .unwrap_or_else(|_| "24".into())
236 .parse()?,
237 generate_timeout_seconds: env::var("GENERATE_TIMEOUT_SECONDS")
238 .unwrap_or_else(|_| "300".into())
239 .parse()?,
240 stream_timeout_seconds: env::var("STREAM_TIMEOUT_SECONDS")
241 .unwrap_or_else(|_| "600".into())
242 .parse()?,
243 health_check_timeout_seconds: env::var("HEALTH_CHECK_TIMEOUT_SECONDS")
244 .unwrap_or_else(|_| "90".into())
245 .parse()?,
246 queue_size: env::var("QUEUE_SIZE")
247 .unwrap_or_else(|_| "100".into())
248 .parse()?,
249 queue_timeout_seconds: env::var("QUEUE_TIMEOUT_SECONDS")
250 .unwrap_or_else(|_| "30".into())
251 .parse()?,
252 backend_url,
253 openrouter_api_key,
254 })
255 }
256
257 fn get_model_path_with_fallback() -> Result<String> {
258 if let Ok(model_path) = env::var("MODEL_PATH") {
260 if std::path::Path::new(&model_path).exists() {
262 info!("Using model from MODEL_PATH: {}", model_path);
263 return Ok(model_path);
264 } else {
265 warn!("MODEL_PATH set but file doesn't exist: {}", model_path);
266 }
267 }
268
269 let exe_dir = std::env::current_exe()
271 .ok()
272 .and_then(|exe| exe.parent().map(|p| p.to_path_buf()))
273 .unwrap_or_else(|| std::env::current_dir().unwrap_or_default());
274
275 let possible_model_locations = vec![
277 exe_dir.join("resources/models/default.gguf"),
279 exe_dir.join("resources/models/model.gguf"),
280 exe_dir.join("models/default.gguf"),
281 exe_dir.join("models/model.gguf"),
282 exe_dir.join("default.gguf"),
283 exe_dir.join("resources/models/default.onnx"),
285 exe_dir.join("resources/models/model.onnx"),
286 exe_dir.join("resources/models/default.trt"),
288 exe_dir.join("resources/models/model.engine"),
289 exe_dir.join("resources/models/default.safetensors"),
291 exe_dir.join("resources/models/model.safetensors"),
292 exe_dir.join("resources/models/default.ggml"),
294 exe_dir.join("resources/models/model.bin"),
295 ];
296
297 for model_path in possible_model_locations {
298 if model_path.exists() {
299 info!("Using embedded model: {}", model_path.display());
300 return Ok(model_path.to_string_lossy().to_string());
301 }
302 }
303
304 if let Ok(entries) = std::fs::read_dir(exe_dir.join("resources/models")) {
306 for entry in entries.flatten() {
307 if let Some(ext) = entry.path().extension() {
308 let ext_str = ext.to_str().unwrap_or("").to_lowercase();
309 if matches!(
311 ext_str.as_str(),
312 "gguf"
313 | "ggml"
314 | "onnx"
315 | "trt"
316 | "engine"
317 | "plan"
318 | "safetensors"
319 | "mlmodel"
320 ) {
321 info!("Using found model: {}", entry.path().display());
322 return Ok(entry.path().to_string_lossy().to_string());
323 }
324 }
325 }
326 }
327
328 Ok("".to_string())
331 }
332
333 fn get_llama_binary_path() -> Result<String> {
341 if let Ok(llama_bin) = env::var("LLAMA_BIN") {
343 if std::path::Path::new(&llama_bin).exists() {
344 info!("Using llama binary from LLAMA_BIN env: {}", llama_bin);
345 return Ok(llama_bin);
346 } else {
347 warn!(
348 "LLAMA_BIN set but file doesn't exist: {}, falling back to auto-detection",
349 llama_bin
350 );
351 }
352 }
353
354 let (os_folder, binary_name) = Self::get_platform_binary_info();
356 info!(
357 "Auto-detecting llama binary for OS: {} (binary: {})",
358 os_folder, binary_name
359 );
360
361 let exe_dir = std::env::current_exe()
363 .ok()
364 .and_then(|exe| exe.parent().map(|p| p.to_path_buf()));
365
366 let cwd = std::env::current_dir().ok();
367
368 let mut search_dirs: Vec<PathBuf> = Vec::new();
370
371 if let Some(ref exe) = exe_dir {
372 search_dirs.push(exe.clone());
373 if let Some(parent) = exe.parent() {
375 search_dirs.push(parent.to_path_buf());
376 if let Some(grandparent) = parent.parent() {
377 search_dirs.push(grandparent.to_path_buf());
378 }
379 }
380 }
381
382 if let Some(ref cwd_path) = cwd {
383 search_dirs.push(cwd_path.clone());
384 }
385
386 #[cfg(debug_assertions)]
388 {
389 let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
390 search_dirs.push(crate_dir);
391 }
392
393 let resource_folder_names = ["Resources", "resources"];
396 for base_dir in &search_dirs {
397 for resource_folder in &resource_folder_names {
398 let bin_dir = base_dir.join(resource_folder).join("bin").join(os_folder);
399
400 if bin_dir.exists() {
401 if let Ok(entries) = std::fs::read_dir(&bin_dir) {
405 let mut dir_entries: Vec<_> = entries.flatten().collect();
407 dir_entries.sort_by_key(|e| e.file_name());
408
409 for entry in dir_entries {
410 let entry_path = entry.path();
411 if !entry_path.is_dir() {
412 continue;
413 }
414
415 #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
418 {
419 let dir_name = entry_path
420 .file_name()
421 .and_then(|n| n.to_str())
422 .unwrap_or("");
423 if dir_name.contains("x64") || dir_name.contains("x86_64") {
425 debug!("Skipping Intel subdir on Apple Silicon: {}", dir_name);
426 continue;
427 }
428 }
429 #[cfg(all(target_os = "macos", target_arch = "x86_64"))]
430 {
431 let dir_name = entry_path
432 .file_name()
433 .and_then(|n| n.to_str())
434 .unwrap_or("");
435 if dir_name.contains("arm64") || dir_name.contains("aarch64") {
437 debug!("Skipping ARM subdir on Intel Mac: {}", dir_name);
438 continue;
439 }
440 }
441
442 let potential_binary = entry_path.join(binary_name);
443 if potential_binary.exists() {
444 info!("Found llama binary at: {}", potential_binary.display());
445 return Ok(potential_binary.to_string_lossy().to_string());
446 }
447 }
448 }
449
450 let direct_binary = bin_dir.join(binary_name);
452 if direct_binary.exists() {
453 info!("Found llama binary at: {}", direct_binary.display());
454 return Ok(direct_binary.to_string_lossy().to_string());
455 }
456 }
457 } }
459
460 let arch = Self::get_arch_hint();
461 warn!(
462 "Llama binary not found. Searched in Resources/bin/{os_folder}/ for '{binary_name}'.\n\
463 Please either:\n\
464 1. Set LLAMA_BIN environment variable to the full path\n\
465 2. Place the binary in Resources/bin/{os_folder}/<subfolder>/\n\
466 \n\
467 Expected binary name: {binary_name}\n\
468 OS detected: {os_folder}\n\
469 Architecture: {arch}\n\
470 Searched directories: {:?}",
471 search_dirs
472 .iter()
473 .map(|p| p.display().to_string())
474 .collect::<Vec<_>>()
475 );
476
477 Ok(String::new())
481 }
482
483 fn get_platform_binary_info() -> (&'static str, &'static str) {
485 #[cfg(target_os = "windows")]
486 {
487 ("Windows", "llama-server.exe")
488 }
489
490 #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
492 {
493 ("MacOS", "llama-server")
494 }
496
497 #[cfg(all(target_os = "macos", target_arch = "x86_64"))]
499 {
500 ("MacOS", "llama-server")
501 }
503
504 #[cfg(target_os = "linux")]
505 {
506 ("Linux", "llama-server")
507 }
508
509 #[cfg(not(any(target_os = "windows", target_os = "macos", target_os = "linux")))]
510 {
511 compile_error!(
512 "Unsupported operating system. Only Windows, macOS, and Linux are supported."
513 );
514 }
515 }
516
517 fn get_arch_hint() -> &'static str {
519 #[cfg(target_arch = "x86_64")]
520 {
521 "x64"
522 }
523 #[cfg(target_arch = "aarch64")]
524 {
525 "arm64"
526 }
527 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
528 {
529 "unknown"
530 }
531 }
532
533 fn auto_detect_threads() -> u32 {
534 let threads = num_cpus::get() as u32;
535 info!("Auto-detected {} CPU cores for inference", threads);
536 threads
537 }
538
539 fn auto_detect_gpu_layers() -> u32 {
540 #[cfg(all(feature = "nvidia", any(target_os = "windows", target_os = "linux")))]
542 {
543 if let Ok(nvml) = Nvml::init() {
544 if let Ok(device_count) = nvml.device_count() {
545 if device_count > 0 {
546 if let Ok(first_gpu) = nvml.device_by_index(0) {
547 if let Ok(memory) = first_gpu.memory_info() {
548 let vram_gb = memory.total / 1024 / 1024 / 1024;
549 let layers = match vram_gb {
550 0..=4 => 12, 5..=8 => 20, 9..=12 => 32, 13..=16 => 50,
554 _ => 50,
555 };
556 info!(
557 "Auto‑detected NVIDIA GPU layers: {} ({} GB VRAM)",
558 layers, vram_gb
559 );
560 return layers;
561 }
562 }
563 }
564 }
565 }
566 info!("No NVIDIA GPU detected, using CPU-optimized defaults (0 GPU layers)");
567 0
568 }
569
570 #[cfg(not(all(feature = "nvidia", any(target_os = "windows", target_os = "linux"))))]
572 #[cfg(any(target_os = "windows", target_os = "linux"))]
573 {
574 use std::process::{Command, Stdio};
575
576 let child = Command::new("nvidia-smi")
578 .args(["--query-gpu=memory.total", "--format=csv,noheader,nounits"])
579 .stdout(Stdio::piped())
580 .stderr(Stdio::null())
581 .spawn();
582
583 match child {
584 Ok(mut process) => {
585 let start = std::time::Instant::now();
586 loop {
587 match process.try_wait() {
588 Ok(Some(status)) => {
589 if status.success() {
590 if let Ok(output) = process.wait_with_output() {
591 let stdout = String::from_utf8_lossy(&output.stdout);
592 if let Some(vram_mb_str) = stdout.lines().next() {
593 if let Ok(vram_mb) = vram_mb_str.trim().parse::<u64>() {
594 let vram_gb = vram_mb / 1024;
595 let layers = match vram_gb {
596 0..=4 => 12,
597 5..=8 => 20,
598 9..=12 => 32,
599 13..=16 => 50,
600 _ => 50,
601 };
602 info!(
603 "Auto‑detected NVIDIA GPU layers via nvidia-smi: {} ({} GB VRAM)",
604 layers, vram_gb
605 );
606 return layers;
607 }
608 }
609 }
610 }
611 info!("nvidia-smi returned but could not parse VRAM, using CPU defaults (0 GPU layers)");
612 return 0;
613 }
614 Ok(None) => {
615 if start.elapsed() > std::time::Duration::from_secs(5) {
616 let _ = process.kill();
617 let _ = process.wait();
618 info!("nvidia-smi timed out, using CPU defaults (0 GPU layers)");
619 return 0;
620 }
621 std::thread::sleep(std::time::Duration::from_millis(50));
622 }
623 Err(_) => {
624 return 0;
625 }
626 }
627 }
628 }
629 Err(_) => {
630 info!("No NVIDIA GPU detected (nvidia-smi not available), using CPU defaults (0 GPU layers)");
631 0
632 }
633 }
634 }
635
636 #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
638 {
639 let total_mem_gb = {
643 let mut sys = System::new_all();
644 sys.refresh_memory();
645 sys.total_memory() / 1024 / 1024 / 1024
646 };
647
648 let layers = match total_mem_gb {
650 0..=8 => 24, 9..=16 => 32, 17..=32 => 40, 33..=64 => 48, _ => 56, };
656 info!(
657 "Apple Silicon detected ({} GB unified memory), using Metal GPU layers: {}",
658 total_mem_gb, layers
659 );
660 layers
661 }
662
663 #[cfg(all(target_os = "macos", target_arch = "x86_64"))]
665 {
666 info!("Intel Mac detected, using CPU-only mode (0 GPU layers)");
669 0
670 }
671 }
672
673 fn auto_detect_ctx_size(model_path: &str) -> u32 {
674 let inferred = Self::read_ctx_size_from_model_path(model_path).unwrap_or_else(|| {
675 info!("Falling back to default context size (8192)");
676 8192
677 });
678 let adjusted = Self::adjust_ctx_size_for_system(inferred);
679 info!("Final context size: {} (inferred: {})", adjusted, inferred);
680 adjusted
681 }
682
683 fn read_ctx_size_from_model_path(model_path: &str) -> Option<u32> {
684 let path_lower = model_path.to_lowercase();
686
687 if path_lower.contains("32k") {
688 Some(32768)
689 } else if path_lower.contains("16k") {
690 Some(16384)
691 } else if path_lower.contains("8k") {
692 Some(8192)
693 } else if path_lower.contains("4k") {
694 Some(4096)
695 } else if path_lower.contains("2k") {
696 Some(2048)
697 } else if path_lower.contains("7b")
698 || path_lower.contains("8b")
699 || path_lower.contains("13b")
700 {
701 Some(4096)
702 } else if path_lower.contains("34b") || path_lower.contains("70b") {
703 Some(8192)
704 } else {
705 Some(8192)
707 }
708 }
709
710 fn adjust_ctx_size_for_system(inferred_ctx: u32) -> u32 {
711 let mut system = System::new_all();
712 system.refresh_memory();
713
714 let available_ram_gb = system.available_memory() / 1024 / 1024 / 1024;
715
716 let required_ram_gb = (inferred_ctx as f32 / 4096.0) * 1.5;
717 if available_ram_gb < required_ram_gb as u64 {
718 let adjusted = (available_ram_gb as f32 * 4096.0 / 1.5) as u32;
719 let safe_ctx = adjusted.min(inferred_ctx).max(2048);
720 warn!(
721 "Reducing context size from {} → {} due to limited RAM ({}GB available)",
722 inferred_ctx, safe_ctx, available_ram_gb
723 );
724 safe_ctx
725 } else {
726 inferred_ctx
727 }
728 }
729
730 fn auto_detect_batch_size(gpu_layers: u32, ctx_size: u32) -> u32 {
731 let mut system = System::new_all();
732 system.refresh_memory();
733
734 let available_mb = system.available_memory() / 1024;
735 let has_gpu = gpu_layers > 0;
736 let memory_per_batch = Self::estimate_memory_per_batch(ctx_size, has_gpu);
737 let safe_available_mb = (available_mb as f32 * 0.6) as u32;
738 let max_batch = (safe_available_mb as f32 / memory_per_batch).max(1.0) as u32;
739
740 let optimal = Self::apply_batch_limits(max_batch, ctx_size, has_gpu);
741 info!(
742 "Auto batch size: {} (ctx: {}, GPU: {}, est mem: {:.1}MB/batch)",
743 optimal, ctx_size, has_gpu, memory_per_batch
744 );
745 optimal
746 }
747
748 fn estimate_memory_per_batch(ctx_size: u32, has_gpu: bool) -> f32 {
749 if has_gpu {
750 (ctx_size as f32 / 1024.0) * 0.5
751 } else {
752 (ctx_size as f32 / 1024.0) * 1.2
753 }
754 }
755
756 fn apply_batch_limits(batch_size: u32, ctx_size: u32, _has_gpu: bool) -> u32 {
757 let limited = batch_size.clamp(16, 1024);
758 match ctx_size {
759 0..=2048 => limited.min(512),
760 2049..=4096 => limited.min(512),
761 4097..=8192 => limited.min(256),
762 8193..=16384 => limited.min(128),
763 16385..=32768 => limited.min(64),
764 _ => limited.min(32),
765 }
766 }
767
768 pub fn print_config(&self) {
769 info!("Current Configuration:");
770 info!("- Model Path: {}", self.model_path);
771 info!("- Llama Binary: {}", self.llama_bin);
772 info!("- Context Size: {}", self.ctx_size);
773 info!("- Batch Size: {}", self.batch_size);
774 info!("- Threads: {}", self.threads);
775 info!("- GPU Layers: {}", self.gpu_layers);
776 info!("- Max Streams: {}", self.max_concurrent_streams);
777 info!("- API: {}:{}", self.api_host, self.api_port);
778 info!("- Backend: {}:{}", self.llama_host, self.llama_port);
779 info!("- Queue Size: {}", self.queue_size);
780 info!("- Queue Timeout: {}s", self.queue_timeout_seconds);
781 info!("- Backend URL: {}", self.backend_url);
782 }
783
784 pub fn api_addr(&self) -> SocketAddr {
785 format!("{}:{}", self.api_host, self.api_port)
786 .parse()
787 .unwrap()
788 }
789}
790
791#[cfg(test)]
792mod tests {
793 use super::*;
794
795 fn create_test_config() -> Config {
797 Config {
798 model_path: "/test/model.gguf".to_string(),
799 llama_bin: "/test/llama-server".to_string(),
800 llama_host: "127.0.0.1".to_string(),
801 llama_port: 8001,
802 ctx_size: 8192,
803 batch_size: 128,
804 threads: 6,
805 gpu_layers: 20,
806 health_timeout_seconds: 600,
807 hot_swap_grace_seconds: 25,
808 max_concurrent_streams: 2,
809 prometheus_port: 9000,
810 api_host: "127.0.0.1".to_string(),
811 api_port: 9999,
812 requests_per_second: 24,
813 generate_timeout_seconds: 300,
814 stream_timeout_seconds: 600,
815 health_check_timeout_seconds: 900,
816 queue_size: 1000,
817 queue_timeout_seconds: 300,
818 backend_url: "http://127.0.0.1:8001".to_string(),
819 openrouter_api_key: "test-api-key".to_string(),
820 }
821 }
822
823 #[test]
826 fn test_config_creation_with_default_values() {
827 let config = create_test_config();
828
829 assert_eq!(config.model_path, "/test/model.gguf");
830 assert_eq!(config.llama_bin, "/test/llama-server");
831 assert_eq!(config.api_port, 9999);
832 assert_eq!(config.llama_port, 8001);
833 }
834
835 #[test]
836 fn test_config_clone() {
837 let config1 = create_test_config();
838 let config2 = config1.clone();
839
840 assert_eq!(config1.api_host, config2.api_host);
841 assert_eq!(config1.threads, config2.threads);
842 assert_eq!(config1.gpu_layers, config2.gpu_layers);
843 }
844
845 #[test]
848 fn test_api_addr_parsing() {
849 let config = create_test_config();
850 let addr = config.api_addr();
851
852 assert_eq!(addr.ip().to_string(), "127.0.0.1");
853 assert_eq!(addr.port(), 9999);
854 }
855
856 #[test]
857 fn test_api_addr_with_different_ports() {
858 let mut config = create_test_config();
859 config.api_port = 3000;
860
861 let addr = config.api_addr();
862 assert_eq!(addr.port(), 3000);
863 }
864
865 #[test]
866 fn test_api_addr_with_zero_address() {
867 let mut config = create_test_config();
868 config.api_host = "0.0.0.0".to_string();
869 config.api_port = 5000;
870
871 let addr = config.api_addr();
872 assert_eq!(addr.port(), 5000);
873 assert_eq!(addr.ip().to_string(), "0.0.0.0");
875 }
876
877 #[test]
880 fn test_config_timeouts_are_positive() {
881 let config = create_test_config();
882
883 assert!(config.health_timeout_seconds > 0);
884 assert!(config.generate_timeout_seconds > 0);
885 assert!(config.stream_timeout_seconds > 0);
886 assert!(config.health_check_timeout_seconds > 0);
887 }
888
889 #[test]
890 fn test_health_check_timeout_greater_than_health_timeout() {
891 let config = create_test_config();
892
893 assert!(config.health_check_timeout_seconds >= config.health_timeout_seconds);
895 }
896
897 #[test]
900 fn test_max_concurrent_streams_is_positive() {
901 let config = create_test_config();
902 assert!(config.max_concurrent_streams > 0);
903 }
904
905 #[test]
906 fn test_requests_per_second_is_reasonable() {
907 let config = create_test_config();
908
909 assert!(config.requests_per_second > 0);
911 assert!(config.requests_per_second <= 1000);
912 }
913
914 #[test]
915 fn test_queue_size_is_positive() {
916 let config = create_test_config();
917 assert!(config.queue_size > 0);
918 }
919
920 #[test]
923 fn test_context_size_within_valid_range() {
924 let config = create_test_config();
925
926 assert!(config.ctx_size >= 512);
928 assert!(config.ctx_size <= 32768);
929 }
930
931 #[test]
932 fn test_batch_size_valid_range() {
933 let config = create_test_config();
934
935 assert!(config.batch_size >= 16);
937 assert!(config.batch_size <= 1024);
938 }
939
940 #[test]
941 fn test_batch_size_reasonable_vs_context() {
942 let config = create_test_config();
943
944 assert!(config.batch_size < config.ctx_size);
946 }
947
948 #[test]
951 fn test_threads_is_positive() {
952 let config = create_test_config();
953 assert!(config.threads > 0);
954 }
955
956 #[test]
957 fn test_threads_within_reasonable_range() {
958 let config = create_test_config();
959
960 assert!(config.threads <= 256);
962 }
963
964 #[test]
967 fn test_gpu_layers_non_negative() {
968 let config = create_test_config();
969 assert!(config.gpu_layers <= config.ctx_size);
970 }
971
972 #[test]
973 fn test_gpu_layers_within_range() {
974 let config = create_test_config();
975
976 assert!(config.gpu_layers <= 100);
978 }
979
980 #[test]
983 fn test_api_port_valid() {
984 let config = create_test_config();
985 assert!(config.api_port > 0);
986 assert!(config.api_port != config.llama_port);
987 }
988
989 #[test]
990 fn test_llama_port_valid() {
991 let config = create_test_config();
992 assert!(config.llama_port > 0);
993 }
994
995 #[test]
996 fn test_prometheus_port_valid() {
997 let config = create_test_config();
998 assert!(config.prometheus_port > 0);
999 }
1000
1001 #[test]
1002 fn test_ports_are_different() {
1003 let config = create_test_config();
1004
1005 assert_ne!(config.api_port, config.llama_port);
1007 assert_ne!(config.api_port, config.prometheus_port);
1008 assert_ne!(config.llama_port, config.prometheus_port);
1009 }
1010
1011 #[test]
1014 fn test_model_path_not_empty() {
1015 let config = create_test_config();
1016 assert!(!config.model_path.is_empty());
1017 }
1018
1019 #[test]
1020 fn test_llama_bin_not_empty() {
1021 let config = create_test_config();
1022 assert!(!config.llama_bin.is_empty());
1023 }
1024
1025 #[test]
1026 fn test_backend_url_not_empty() {
1027 let config = create_test_config();
1028 assert!(!config.backend_url.is_empty());
1029 }
1030
1031 #[test]
1032 fn test_backend_url_format() {
1033 let config = create_test_config();
1034
1035 assert!(
1037 config.backend_url.starts_with("http://") || config.backend_url.starts_with("https://")
1038 );
1039 }
1040
1041 #[test]
1044 fn test_api_host_not_empty() {
1045 let config = create_test_config();
1046 assert!(!config.api_host.is_empty());
1047 }
1048
1049 #[test]
1050 fn test_llama_host_not_empty() {
1051 let config = create_test_config();
1052 assert!(!config.llama_host.is_empty());
1053 }
1054
1055 #[test]
1058 fn test_hot_swap_grace_positive() {
1059 let config = create_test_config();
1060 assert!(config.hot_swap_grace_seconds > 0);
1061 }
1062
1063 #[test]
1064 fn test_hot_swap_grace_reasonable() {
1065 let config = create_test_config();
1066
1067 assert!(config.hot_swap_grace_seconds < 300);
1069 }
1070
1071 #[test]
1074 fn test_auto_detect_threads_returns_positive() {
1075 let threads = Config::auto_detect_threads();
1076 assert!(threads > 0);
1077 }
1078
1079 #[test]
1080 fn test_auto_detect_gpu_layers_non_negative() {
1081 let layers = Config::auto_detect_gpu_layers();
1082 assert!(layers <= 512);
1083 }
1084
1085 #[test]
1086 fn test_apply_batch_limits_small_context() {
1087 let batch = Config::apply_batch_limits(1024, 1024, false);
1089 assert!(batch <= 512);
1090 }
1091
1092 #[test]
1093 fn test_apply_batch_limits_medium_context() {
1094 let batch = Config::apply_batch_limits(1024, 3000, false);
1096 assert!(batch <= 384);
1097 }
1098
1099 #[test]
1100 fn test_apply_batch_limits_large_context() {
1101 let batch = Config::apply_batch_limits(1024, 24576, false);
1103 assert!(batch <= 64);
1104 }
1105
1106 #[test]
1107 fn test_apply_batch_limits_minimum() {
1108 let batch = Config::apply_batch_limits(1, 8192, false);
1110 assert!(batch >= 16);
1111 }
1112
1113 #[test]
1114 fn test_estimate_memory_per_batch_cpu() {
1115 let memory_cpu = Config::estimate_memory_per_batch(8192, false);
1116 assert!(memory_cpu > 0.0);
1117 }
1118
1119 #[test]
1120 fn test_estimate_memory_per_batch_gpu() {
1121 let memory_gpu = Config::estimate_memory_per_batch(8192, true);
1122 assert!(memory_gpu > 0.0);
1123 }
1124
1125 #[test]
1126 fn test_estimate_memory_gpu_less_than_cpu() {
1127 let memory_cpu = Config::estimate_memory_per_batch(8192, false);
1128 let memory_gpu = Config::estimate_memory_per_batch(8192, true);
1129
1130 assert!(memory_gpu < memory_cpu);
1132 }
1133
1134 #[test]
1137 fn test_queue_timeout_is_positive() {
1138 let config = create_test_config();
1139 assert!(config.queue_timeout_seconds > 0);
1140 }
1141
1142 #[test]
1143 fn test_queue_timeout_less_than_generate_timeout() {
1144 let config = create_test_config();
1145
1146 assert!(config.queue_timeout_seconds <= config.generate_timeout_seconds);
1148 }
1149
1150 #[test]
1153 fn test_config_values_consistency() {
1154 let config = create_test_config();
1155
1156 assert!(config.health_timeout_seconds <= 3600); assert!(config.generate_timeout_seconds <= 1800); assert!(config.stream_timeout_seconds <= 3600); assert!(config.health_check_timeout_seconds <= 3600); }
1162
1163 #[test]
1164 fn test_config_backend_url_consistency() {
1165 let config = create_test_config();
1166
1167 assert!(
1169 config.backend_url.contains(&config.llama_host)
1170 || config.backend_url.contains("127.0.0.1")
1171 || config.backend_url.contains("localhost")
1172 );
1173 }
1174
1175 #[test]
1176 fn test_config_all_fields_initialized() {
1177 let config = create_test_config();
1178
1179 assert!(!config.model_path.is_empty());
1181 assert!(!config.llama_bin.is_empty());
1182 assert!(!config.api_host.is_empty());
1183 assert!(!config.llama_host.is_empty());
1184 assert!(config.threads > 0);
1185 assert!(config.gpu_layers <= config.ctx_size);
1186 assert!(config.api_port > 0);
1187 assert!(config.llama_port > 0);
1188 }
1189}