1use crate::cmd::run_cmd;
23use serde::{Deserialize, Serialize};
24use std::sync::Mutex;
25
26static TELEMETRY_CACHE: Mutex<Option<(Telemetry, std::time::Instant)>> = Mutex::new(None);
27const CACHE_TTL_SECS: u64 = 5;
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct Telemetry {
35 pub timestamp: u64,
37 pub system: SystemInfo,
39 pub hardware: HardwareInfo,
41 pub services: ServiceInfo,
43 pub network: NetworkInfo,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct SystemInfo {
50 pub cpu_model: String,
52 pub ram_total: String,
54 pub ram_free: String,
56 pub disk_total: String,
58 pub disk_free: String,
60 pub disk_used_percent: String,
62 pub uptime: String,
64 pub load_average: String,
66 pub ram_total_bytes: u64,
69 pub ram_free_bytes: u64,
71 pub disk_total_bytes: u64,
73 pub disk_free_bytes: u64,
75 pub disk_used_percent_numeric: f64,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct HardwareInfo {
85 pub tpu_devices: usize,
87 pub gpu_devices: usize,
89 pub jax_available: bool,
91 pub jax_version: Option<String>,
93 pub jax_device_count: Option<usize>,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct ServiceInfo {
103 pub vllm_version: Option<String>,
105 pub vllm_running: bool,
107 pub vllm_port_bound: bool,
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct NetworkInfo {
114 pub public_ip: String,
116 pub tunnel_running: bool,
118 pub tunnel_name: Option<String>,
120}
121
122impl Telemetry {
123 pub fn capture() -> Self {
129 let now = std::time::Instant::now();
130 {
131 let cache = TELEMETRY_CACHE.lock().unwrap_or_else(|e| e.into_inner());
132 if let Some((cached, instant)) = cache.as_ref() {
133 if now.duration_since(*instant).as_secs() < CACHE_TTL_SECS {
134 return cached.clone();
135 }
136 }
137 }
138
139 let timestamp = std::time::SystemTime::now()
140 .duration_since(std::time::UNIX_EPOCH)
141 .map(|d| d.as_secs())
142 .unwrap_or(0);
143
144 let telemetry = Self {
145 timestamp,
146 system: SystemInfo::capture(),
147 hardware: HardwareInfo::capture(),
148 services: ServiceInfo::capture(),
149 network: NetworkInfo::capture(),
150 };
151
152 let mut cache = TELEMETRY_CACHE.lock().unwrap_or_else(|e| e.into_inner());
153 *cache = Some((telemetry.clone(), now));
154 telemetry
155 }
156
157 pub fn print_report(&self) {
159 println!("\n{}", "=".repeat(60));
160 println!(" RUNTIMO TELEMETRY [{}]", self.timestamp);
161 println!("{}", "=".repeat(60));
162
163 println!("\n--- SYSTEM ---");
164 println!(" CPU : {}", self.system.cpu_model);
165 println!(
166 " RAM : {} total, {} free",
167 self.system.ram_total, self.system.ram_free
168 );
169 println!(
170 " Disk : {} total, {} free ({}% used)",
171 self.system.disk_total, self.system.disk_free, self.system.disk_used_percent
172 );
173 println!(" Uptime: {}", self.system.uptime);
174 println!(" Load : {}", self.system.load_average);
175
176 println!("\n--- HARDWARE ---");
177 println!(" TPU Devices: {}", self.hardware.tpu_devices);
178 println!(" GPU Devices: {}", self.hardware.gpu_devices);
179 if self.hardware.jax_available {
180 println!(
181 " JAX: v{} ({} devices)",
182 self.hardware
183 .jax_version
184 .clone()
185 .unwrap_or_else(|| "unknown".into()),
186 self.hardware.jax_device_count.unwrap_or(0)
187 );
188 } else {
189 println!(" JAX: Not available");
190 }
191
192 println!("\n--- SERVICES ---");
193 match &self.services.vllm_version {
194 Some(v) => println!(
195 " vLLM: v{} ({})",
196 v,
197 if self.services.vllm_running {
198 "running"
199 } else {
200 "not running"
201 }
202 ),
203 None => println!(" vLLM: not installed"),
204 }
205 println!(
206 " Port 8200: {}",
207 if self.services.vllm_port_bound {
208 "BOUND"
209 } else {
210 "NOT BOUND"
211 }
212 );
213
214 println!("\n--- NETWORK ---");
215 println!(" Public IP: {}", self.network.public_ip);
216 println!(
217 " Tunnel: {} ({})",
218 if self.network.tunnel_running {
219 "running"
220 } else {
221 "not running"
222 },
223 self.network
224 .tunnel_name
225 .clone()
226 .unwrap_or_else(|| "unknown".into())
227 );
228
229 println!("\n{}", "=".repeat(60));
230 }
231}
232
233impl SystemInfo {
234 fn capture() -> Self {
235 let ram_total = run_cmd("free -h | grep Mem | awk '{print $2}'");
236 let ram_free = run_cmd("free -h | grep Mem | awk '{print $4}'");
237 let disk_total = run_cmd("df -h / | tail -1 | awk '{print $2}'");
238 let disk_free = run_cmd("df -h / | tail -1 | awk '{print $4}'");
239 let disk_pct_str = run_cmd("df / | tail -1 | awk '{print $5}'");
240 let disk_used_percent = disk_pct_str.replace('%', "");
241 let disk_used_percent_numeric = disk_used_percent.parse::<f64>().unwrap_or(0.0);
242 let ram_total_bytes = run_cmd("free -b | grep Mem | awk '{print $2}'")
243 .parse()
244 .unwrap_or(0);
245 let ram_free_bytes = run_cmd("free -b | grep Mem | awk '{print $4}'")
246 .parse()
247 .unwrap_or(0);
248 let disk_total_bytes = run_cmd("df --bytes / | tail -1 | awk '{print $2}'")
249 .parse()
250 .unwrap_or(0);
251 let disk_free_bytes = run_cmd("df --bytes / | tail -1 | awk '{print $4}'")
252 .parse()
253 .unwrap_or(0);
254
255 Self {
256 cpu_model: run_cmd("cat /proc/cpuinfo | grep 'model name' | head -1 | cut -d: -f2"),
257 ram_total,
258 ram_free,
259 disk_total,
260 disk_free,
261 disk_used_percent,
262 uptime: run_cmd("uptime -p"),
263 load_average: run_cmd("uptime | awk -F'load average:' '{print $2}'"),
264 ram_total_bytes,
265 ram_free_bytes,
266 disk_total_bytes,
267 disk_free_bytes,
268 disk_used_percent_numeric,
269 }
270 }
271}
272
273impl HardwareInfo {
274 fn capture() -> Self {
275 let tpu_devices = run_cmd("ls /dev/accel* 2>/dev/null | wc -l")
276 .parse()
277 .unwrap_or(0);
278
279 let gpu_devices = run_cmd("nvidia-smi --list-gpus 2>/dev/null | wc -l")
280 .parse()
281 .unwrap_or(0);
282
283 let jax_available =
284 run_cmd("timeout 10 python3 -c 'import jax' 2>/dev/null && echo yes || echo no") == "yes";
285 let jax_version = if jax_available {
286 Some(run_cmd("timeout 10 python3 -c 'import jax; print(jax.__version__)'"))
287 } else {
288 None
289 };
290 let jax_device_count = if jax_available {
291 run_cmd("timeout 10 python3 -c 'import jax; print(len(jax.devices()))'")
292 .parse()
293 .ok()
294 } else {
295 None
296 };
297
298 Self {
299 tpu_devices,
300 gpu_devices,
301 jax_available,
302 jax_version,
303 jax_device_count,
304 }
305 }
306}
307
308impl ServiceInfo {
309 fn capture() -> Self {
310 let vllm_version = run_cmd("timeout 10 python3 -c 'import vllm; print(vllm.__version__)' 2>/dev/null");
311 let vllm_running = !run_cmd("pgrep -fa 'vllm serve'").is_empty();
312 let vllm_port_bound =
313 !run_cmd("ss -ltn '( sport = :8200 )' 2>/dev/null | grep 8200").is_empty();
314
315 Self {
316 vllm_version: if vllm_version.is_empty() {
317 None
318 } else {
319 Some(vllm_version)
320 },
321 vllm_running,
322 vllm_port_bound,
323 }
324 }
325}
326
327impl NetworkInfo {
328 fn capture() -> Self {
329 let public_ip = run_cmd("curl -s --connect-timeout 5 --max-time 5 ifconfig.me 2>/dev/null || echo 'unknown'");
330 let tunnel_output = run_cmd("pgrep -fa cloudflared");
331 let tunnel_running = !tunnel_output.is_empty();
332 let tunnel_name = if tunnel_running {
333 Some(tunnel_output)
334 } else {
335 None
336 };
337
338 Self {
339 public_ip,
340 tunnel_running,
341 tunnel_name,
342 }
343 }
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349
350 #[test]
351 fn test_telemetry_capture() {
352 let telemetry = Telemetry::capture();
353 assert!(telemetry.timestamp > 0);
354 }
355}