1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
//! Auto-configuration engine for Kandil Code
//!
//! Automatically selects optimal settings based on hardware capabilities.
use crate::config::layered::{
Config, FallbackConfig, ModelConfig, PerformanceConfig, Quantization,
};
use crate::core::hardware::HardwareProfile;
use crate::models::catalog::MODEL_CATALOG;
pub struct AutoConfig;
impl AutoConfig {
pub fn from_hardware(profile: &HardwareProfile) -> Config {
let model_spec = Self::select_model(profile);
let performance = Self::tune_performance(profile, model_spec);
let fallback = Self::configure_fallback(profile);
Config {
model: ModelConfig {
name: model_spec.name.to_string(),
path: None,
context_size: Self::select_context_size(profile, model_spec),
quantization: Quantization::Q4_K_M, // Default quantization
threads: Some(performance.threads), // Use the performance threads value
},
performance,
fallback,
strategy: crate::config::layered::StrategyConfig::default(),
}
}
fn select_model(profile: &HardwareProfile) -> &'static crate::models::catalog::ModelSpec {
// Hard constraints first - filter by available RAM
let compatible: Vec<_> = MODEL_CATALOG
.iter()
.filter(|m| m.ram_required_gb <= profile.total_ram_gb)
.collect();
// If GPU is available, prefer GPU-capable models
if let Some(gpu) = &profile.gpu {
if let Some(best) = compatible
.iter()
.filter(|m| m.gpu_vram_min.is_none() || m.gpu_vram_min.unwrap() <= gpu.memory_gb)
.max_by_key(|m| m.quality_rating.as_i32())
{
return best;
}
}
// CPU-only fallback - pick the highest quality model that fits in RAM
if let Some(best) = compatible
.iter()
.filter(|m| m.gpu_vram_min.is_none()) // CPU-only models
.max_by_key(|m| m.quality_rating.as_i32())
{
return best;
}
// Last resort: smallest model that fits
MODEL_CATALOG
.iter()
.filter(|m| m.ram_required_gb <= profile.total_ram_gb)
.min_by_key(|m| m.size_gb as u64)
.unwrap_or_else(|| MODEL_CATALOG.first().unwrap()) // Fallback to first model
}
fn select_context_size(
profile: &HardwareProfile,
model: &crate::models::catalog::ModelSpec,
) -> usize {
// Start with the largest context size the model supports
let max_supported = *model.context_sizes.iter().max().unwrap_or(&4096);
// Estimate based on available RAM (leaving some headroom)
let ram_based = if profile.total_ram_gb > 0 {
// Rough calculation: larger models need more memory per token
let memory_per_token_mb = if model.size_gb > 10.0 { 0.5 } else { 0.25 }; // Rough estimate
let available_for_context = (profile.available_ram_gb.saturating_sub(2)) as f64; // Reserve 2GB
((available_for_context * 1024.0) / memory_per_token_mb) as usize
} else {
4096 // Default if we can't calculate
};
// Use the smaller of model capability and RAM-based calculation
let max_usable = max_supported.min(ram_based);
// Find the largest context size that fits within our limit
*model
.context_sizes
.iter()
.filter(|&&size| size <= max_usable)
.max()
.unwrap_or(&4096) // Safe default
}
fn tune_performance(
profile: &HardwareProfile,
_model_spec: &crate::models::catalog::ModelSpec,
) -> PerformanceConfig {
let threads = if profile.total_ram_gb < 8 {
// On low RAM systems, use fewer threads to reduce memory pressure
profile.cpu_physical_cores.min(4).max(1) // At least 1 thread, max 4
} else {
// Use more threads on systems with adequate RAM
profile.cpu_physical_cores.min(8).max(1) // Cap at 8 for most consumer systems
};
let use_mmap = profile.total_ram_gb >= 16; // MMAP is beneficial on systems with sufficient RAM
PerformanceConfig {
threads,
use_mmap,
batch_size: 512, // Standard batch size
target_latency_ms: 2000, // 2 second target for response time
reserve_ram_gb: profile.total_ram_gb.saturating_div(4).max(2).min(8), // Reserve 25% or between 2-8GB
}
}
fn configure_fallback(profile: &HardwareProfile) -> FallbackConfig {
FallbackConfig {
enabled: profile.total_ram_gb < 16, // Enable fallback for systems with less than 16GB
cloud_provider: None, // User must configure API key
timeout_ms: 30000, // 30 second timeout for fallback requests
}
}
}