car_inference/
hardware.rs1use serde::{Serialize, Deserialize};
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct HardwareInfo {
7 pub os: String,
8 pub arch: String,
9 pub cpu_cores: usize,
10 pub total_ram_mb: u64,
11 pub gpu_backend: GpuBackend,
12 pub gpu_memory_mb: Option<u64>,
13 pub recommended_model: String,
15 pub recommended_context: usize,
17 pub max_model_mb: u64,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22#[serde(rename_all = "snake_case")]
23pub enum GpuBackend {
24 Metal,
25 Cuda,
26 Cpu,
27}
28
29impl HardwareInfo {
30 pub fn detect() -> Self {
32 let os = detect_os();
33 let arch = std::env::consts::ARCH.to_string();
34 let cpu_cores = std::thread::available_parallelism()
35 .map(|n| n.get()).unwrap_or(1);
36 let total_ram_mb = detect_ram_mb();
37 let gpu_backend = detect_gpu_backend();
38 let gpu_memory_mb = detect_gpu_memory_mb(&gpu_backend, total_ram_mb);
39
40 let available_mb = gpu_memory_mb.unwrap_or(total_ram_mb);
46 let embedding_model_mb: u64 = 700; let max_model_mb = ((available_mb as f64 * 0.6) as u64).saturating_sub(embedding_model_mb);
48
49 let recommended_model = recommend_model(max_model_mb);
50 let recommended_context = recommend_context(available_mb, &recommended_model);
51
52 Self {
53 os, arch, cpu_cores, total_ram_mb,
54 gpu_backend, gpu_memory_mb,
55 recommended_model, recommended_context, max_model_mb,
56 }
57 }
58}
59
60fn detect_os() -> String {
61 if cfg!(target_os = "macos") { "macos".into() }
62 else if cfg!(target_os = "linux") { "linux".into() }
63 else if cfg!(target_os = "windows") { "windows".into() }
64 else { std::env::consts::OS.into() }
65}
66
67fn detect_ram_mb() -> u64 {
68 #[cfg(target_os = "macos")]
70 {
71 if let Ok(output) = std::process::Command::new("sysctl")
72 .args(["-n", "hw.memsize"])
73 .output()
74 {
75 if let Ok(s) = String::from_utf8(output.stdout) {
76 if let Ok(bytes) = s.trim().parse::<u64>() {
77 return bytes / (1024 * 1024);
78 }
79 }
80 }
81 }
82 #[cfg(target_os = "linux")]
84 {
85 if let Ok(content) = std::fs::read_to_string("/proc/meminfo") {
86 for line in content.lines() {
87 if line.starts_with("MemTotal:") {
88 let parts: Vec<&str> = line.split_whitespace().collect();
89 if parts.len() >= 2 {
90 if let Ok(kb) = parts[1].parse::<u64>() {
91 return kb / 1024;
92 }
93 }
94 }
95 }
96 }
97 }
98 8192
100}
101
102fn detect_gpu_backend() -> GpuBackend {
103 #[cfg(feature = "metal")]
104 { return GpuBackend::Metal; }
105 #[cfg(feature = "cuda")]
106 { return GpuBackend::Cuda; }
107 #[cfg(not(any(feature = "metal", feature = "cuda")))]
108 {
109 if cfg!(target_os = "macos") && cfg!(target_arch = "aarch64") {
111 GpuBackend::Metal
112 } else {
113 GpuBackend::Cpu
114 }
115 }
116}
117
118fn detect_gpu_memory_mb(backend: &GpuBackend, total_ram_mb: u64) -> Option<u64> {
119 match backend {
120 GpuBackend::Metal => {
121 Some(total_ram_mb.saturating_sub(4096))
124 }
125 GpuBackend::Cuda => {
126 #[cfg(target_os = "linux")]
128 {
129 if let Ok(output) = std::process::Command::new("nvidia-smi")
130 .args(["--query-gpu=memory.total", "--format=csv,noheader,nounits"])
131 .output()
132 {
133 if let Ok(s) = String::from_utf8(output.stdout) {
134 if let Ok(mb) = s.trim().parse::<u64>() {
135 return Some(mb);
136 }
137 }
138 }
139 }
140 None
141 }
142 GpuBackend::Cpu => None,
143 }
144}
145
146fn recommend_model(max_model_mb: u64) -> String {
148 if max_model_mb >= 17000 {
149 "Qwen3-30B-A3B".into()
150 } else if max_model_mb >= 4900 {
151 "Qwen3-8B".into()
152 } else if max_model_mb >= 2500 {
153 "Qwen3-4B".into()
154 } else if max_model_mb >= 1800 {
155 "Qwen3-1.7B".into()
156 } else {
157 "Qwen3-0.6B".into()
158 }
159}
160
161fn recommend_context(available_mb: u64, model_name: &str) -> usize {
163 let model_mb = match model_name {
164 "Qwen3-0.6B" => 650,
165 "Qwen3-1.7B" => 1800,
166 "Qwen3-4B" => 2500,
167 "Qwen3-8B" => 4900,
168 "Qwen3-30B-A3B" => 17000,
169 _ => 650,
170 };
171 let kv_cost_per_1k = match model_name {
172 "Qwen3-0.6B" => 0.1,
173 "Qwen3-1.7B" => 0.3,
174 "Qwen3-4B" => 0.5,
175 "Qwen3-8B" => 1.0,
176 "Qwen3-30B-A3B" => 1.5,
177 _ => 0.1,
178 };
179
180 let kv_budget_mb = available_mb.saturating_sub(model_mb).saturating_sub(1024) as f64;
181 let max_context = (kv_budget_mb / kv_cost_per_1k * 1000.0) as usize;
182
183 max_context.clamp(2048, 131072)
184}