offline_intelligence/model_management/
recommendation.rs1use super::registry::ModelInfo;
10use crate::config::Config;
11use serde::{Deserialize, Serialize};
12use tracing::{debug, info};
13
14#[cfg(target_os = "macos")]
16use sysinfo::System;
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct UserPreferences {
21 pub primary_use_case: UseCase,
22 pub quality_preference: QualityPreference,
23 pub speed_preference: SpeedPreference,
24 pub cost_sensitivity: CostSensitivity,
25 pub preferred_formats: Vec<String>,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
30pub enum UseCase {
31 ChatAssistant,
32 CodeGeneration,
33 CreativeWriting,
34 ResearchAnalysis,
35 Translation,
36 GeneralPurpose,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
41pub enum QualityPreference {
42 HighQuality, Balanced, FastResponse, }
46
47#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
49pub enum SpeedPreference {
50 Fastest, Balanced, HighestQuality, }
54
55#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
57pub enum CostSensitivity {
58 Budget, Moderate, Premium, }
62
63impl Default for UserPreferences {
64 fn default() -> Self {
65 Self {
66 primary_use_case: UseCase::GeneralPurpose,
67 quality_preference: QualityPreference::Balanced,
68 speed_preference: SpeedPreference::Balanced,
69 cost_sensitivity: CostSensitivity::Moderate,
70 preferred_formats: vec!["gguf".to_string()], }
72 }
73}
74
75#[derive(Debug, Clone)]
77pub struct HardwareProfile {
78 pub total_ram_gb: f32,
79 pub available_ram_gb: f32,
80 pub cpu_cores: u32,
81 pub cpu_threads: u32,
82 pub gpu_available: bool,
83 pub gpu_vram_gb: Option<f32>,
84 pub gpu_compute_capability: Option<f32>,
85 pub system_architecture: String,
86}
87
88pub struct ModelRecommender {
90 user_preferences: UserPreferences,
91}
92
93impl ModelRecommender {
94 pub fn new() -> Self {
95 Self {
96 user_preferences: UserPreferences::default(),
97 }
98 }
99
100 pub fn set_preferences(&mut self, preferences: UserPreferences) {
102 self.user_preferences = preferences;
103 info!("Updated user preferences: {:?}", self.user_preferences);
104 }
105
106 pub fn get_preferences(&self) -> &UserPreferences {
108 &self.user_preferences
109 }
110
111 pub fn detect_hardware_profile(config: &Config) -> HardwareProfile {
113 let mut system = sysinfo::System::new_all();
114 system.refresh_memory();
115 system.refresh_cpu();
116
117 let total_ram_gb = system.total_memory() as f32 / (1024.0 * 1024.0 * 1024.0);
118 let available_ram_gb = system.available_memory() as f32 / (1024.0 * 1024.0 * 1024.0);
119 let cpu_cores = num_cpus::get() as u32;
120 let cpu_threads = config.threads;
121
122 let (gpu_available, gpu_vram_gb, gpu_compute_capability) = Self::detect_gpu();
124
125 HardwareProfile {
126 total_ram_gb,
127 available_ram_gb,
128 cpu_cores,
129 cpu_threads,
130 gpu_available,
131 gpu_vram_gb,
132 gpu_compute_capability,
133 system_architecture: std::env::consts::ARCH.to_string(),
134 }
135 }
136
137 fn detect_gpu() -> (bool, Option<f32>, Option<f32>) {
139 #[cfg(all(feature = "nvidia", any(target_os = "windows", target_os = "linux")))]
141 {
142 match nvml_wrapper::Nvml::init() {
143 Ok(nvml) => {
144 match nvml.device_count() {
145 Ok(count) if count > 0 => {
146 match nvml.device_by_index(0) {
147 Ok(device) => {
148 let vram_bytes =
149 device.memory_info().map(|mem| mem.total).unwrap_or(0);
150 let vram_gb = if vram_bytes > 0 {
151 Some(vram_bytes as f32 / (1024.0 * 1024.0 * 1024.0))
152 } else {
153 None
154 };
155
156 let compute_capability = Some(7.5); (true, vram_gb, compute_capability)
160 }
161 Err(_) => (false, None, None),
162 }
163 }
164 _ => (false, None, None),
165 }
166 }
167 Err(_) => (false, None, None),
168 }
169 }
170
171 #[cfg(all(not(feature = "nvidia"), any(target_os = "windows", target_os = "linux")))]
173 {
174 (false, None, None)
176 }
177
178 #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
180 {
181 use sysinfo::System;
182 let mut sys = System::new_all();
183 sys.refresh_memory();
184 let unified_mem_gb = sys.total_memory() as f32 / (1024.0 * 1024.0 * 1024.0);
186 info!(
188 "Apple Silicon detected with Metal GPU, unified memory: {:.1} GB",
189 unified_mem_gb
190 );
191 (true, Some(unified_mem_gb), None)
192 }
193
194 #[cfg(all(target_os = "macos", target_arch = "x86_64"))]
196 {
197 info!("Intel Mac detected, no GPU acceleration available");
198 (false, None, None)
199 }
200 }
201
202 pub fn score_model_compatibility(&self, model: &ModelInfo, hardware: &HardwareProfile) -> f32 {
204 let mut score = 1.0f32;
205
206 let model_ram_gb = (model.size_bytes as f32) / (1024.0 * 1024.0 * 1024.0) * 1.5; if model_ram_gb > hardware.available_ram_gb {
210 score *= 0.3; } else if model_ram_gb > hardware.total_ram_gb * 0.8 {
212 score *= 0.7; }
214
215 let requires_gpu = model.tags.contains(&"gpu".to_string())
217 || model.format.eq_ignore_ascii_case("tensorrt");
218
219 if requires_gpu && !hardware.gpu_available {
220 score *= 0.2; } else if requires_gpu && hardware.gpu_available {
222 if let Some(vram_gb) = hardware.gpu_vram_gb {
223 let required_vram = match model.size_bytes {
224 s if s < 5 * 1024 * 1024 * 1024 => 6.0, s if s < 10 * 1024 * 1024 * 1024 => 12.0,
226 _ => 24.0,
227 };
228
229 if vram_gb < required_vram * 0.8 {
230 score *= 0.5; }
232 }
233 }
234
235 if self
237 .user_preferences
238 .preferred_formats
239 .iter()
240 .any(|f| model.format.eq_ignore_ascii_case(f))
241 {
242 score *= 1.2; }
244
245 score *= self.score_use_case_alignment(model);
247
248 score *= self.score_quality_speed_preference(model);
250
251 score.clamp(0.0, 1.0)
252 }
253
254 fn score_use_case_alignment(&self, model: &ModelInfo) -> f32 {
256 let use_case_tags: Vec<&str> = model.tags.iter().map(|s| s.as_str()).collect();
257
258 match self.user_preferences.primary_use_case {
259 UseCase::ChatAssistant => {
260 if use_case_tags.contains(&"chat") || use_case_tags.contains(&"instruction") {
261 1.3
262 } else if use_case_tags.contains(&"general") {
263 1.1
264 } else {
265 1.0
266 }
267 }
268 UseCase::CodeGeneration => {
269 if use_case_tags.contains(&"code") || use_case_tags.contains(&"programming") {
270 1.4
271 } else {
272 1.0
273 }
274 }
275 UseCase::CreativeWriting => {
276 if use_case_tags.contains(&"creative") || use_case_tags.contains(&"story") {
277 1.3
278 } else if use_case_tags.contains(&"text-generation") {
279 1.1
280 } else {
281 1.0
282 }
283 }
284 UseCase::ResearchAnalysis => {
285 if use_case_tags.contains(&"research") || use_case_tags.contains(&"analysis") {
286 1.3
287 } else {
288 1.0
289 }
290 }
291 UseCase::Translation => {
292 if use_case_tags.contains(&"translation") || use_case_tags.contains(&"multilingual")
293 {
294 1.4
295 } else {
296 1.0
297 }
298 }
299 UseCase::GeneralPurpose => 1.0,
300 }
301 }
302
303 fn score_quality_speed_preference(&self, model: &ModelInfo) -> f32 {
305 let model_size_category = if model.size_bytes < 3 * 1024 * 1024 * 1024 {
306 "small" } else if model.size_bytes < 10 * 1024 * 1024 * 1024 {
308 "medium" } else {
310 "large" };
312
313 match (
314 &self.user_preferences.quality_preference,
315 &self.user_preferences.speed_preference,
316 ) {
317 (QualityPreference::HighQuality, SpeedPreference::HighestQuality) => {
318 match model_size_category {
319 "large" => 1.3,
320 "medium" => 1.1,
321 "small" => 0.8,
322 _ => 1.0,
323 }
324 }
325 (QualityPreference::FastResponse, SpeedPreference::Fastest) => {
326 match model_size_category {
327 "small" => 1.3,
328 "medium" => 1.0,
329 "large" => 0.6,
330 _ => 1.0,
331 }
332 }
333 _ => 1.0, }
335 }
336
337 pub fn get_recommendations(
339 &self,
340 models: Vec<&ModelInfo>,
341 hardware: &HardwareProfile,
342 max_results: usize,
343 ) -> Vec<(String, f32)> {
344 let mut scored_models: Vec<(String, f32)> = models
345 .iter()
346 .map(|model| {
347 let score = self.score_model_compatibility(model, hardware);
348 (model.id.clone(), score)
349 })
350 .collect();
351
352 scored_models.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
354
355 scored_models.truncate(max_results);
357
358 debug!("Generated {} model recommendations", scored_models.len());
359 scored_models
360 }
361
362 pub fn get_hardware_recommendation_message(&self, hardware: &HardwareProfile) -> String {
364 let mut recommendations = Vec::new();
365
366 if hardware.total_ram_gb < 8.0 {
367 recommendations
368 .push("Consider upgrading RAM to 8GB+ for better model performance".to_string());
369 }
370
371 if !hardware.gpu_available && hardware.total_ram_gb >= 16.0 {
372 recommendations.push(
373 "A GPU would significantly accelerate inference for larger models".to_string(),
374 );
375 }
376
377 if hardware.gpu_available {
378 if let Some(vram) = hardware.gpu_vram_gb {
379 if vram < 8.0 {
380 recommendations.push(format!(
381 "With {:.1}GB VRAM, you can run medium-sized models efficiently",
382 vram
383 ));
384 } else if vram >= 16.0 {
385 recommendations.push(format!(
386 "With {:.1}GB VRAM, you can run large models with full GPU acceleration",
387 vram
388 ));
389 }
390 }
391 }
392
393 if recommendations.is_empty() {
394 "Your system configuration is well-suited for running various model sizes".to_string()
395 } else {
396 recommendations.join("\n")
397 }
398 }
399}
400
401#[cfg(test)]
402mod tests {
403 use super::*;
404 use crate::config::Config;
405
406 #[test]
407 fn test_hardware_detection() {
408 let config = Config {
409 model_path: "".to_string(),
410 llama_bin: "".to_string(),
411 llama_host: "127.0.0.1".to_string(),
412 llama_port: 8001,
413 ctx_size: 8192,
414 batch_size: 128,
415 threads: 6,
416 gpu_layers: 20,
417 health_timeout_seconds: 600,
418 hot_swap_grace_seconds: 25,
419 max_concurrent_streams: 2,
420 prometheus_port: 9000,
421 api_host: "127.0.0.1".to_string(),
422 api_port: 9999,
423 requests_per_second: 24,
424 generate_timeout_seconds: 300,
425 stream_timeout_seconds: 600,
426 health_check_timeout_seconds: 900,
427 queue_size: 1000,
428 queue_timeout_seconds: 300,
429 backend_url: "http://127.0.0.1:8001".to_string(),
430 openrouter_api_key: "".to_string(),
431 };
432
433 let hardware = ModelRecommender::detect_hardware_profile(&config);
434 assert!(hardware.total_ram_gb > 0.0);
435 assert!(hardware.cpu_cores > 0);
436 }
437}