1use crate::cmd::run_cmd;
26use serde::{Deserialize, Serialize};
27use std::sync::Mutex;
28
29static TELEMETRY_CACHE: Mutex<Option<(Telemetry, std::time::Instant)>> = Mutex::new(None);
30const CACHE_TTL_SECS: u64 = 30;
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct Telemetry {
38 pub timestamp: u64,
40 pub system: SystemInfo,
42 pub hardware: HardwareInfo,
44 pub services: ServiceInfo,
46 pub network: NetworkInfo,
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct SystemInfo {
53 pub cpu_model: String,
55 pub ram_total: String,
57 pub ram_free: String,
59 pub disk_total: String,
61 pub disk_free: String,
63 pub disk_used_percent: String,
65 pub uptime: String,
67 pub load_average: String,
69 pub ram_total_bytes: u64,
72 pub ram_free_bytes: u64,
74 pub disk_total_bytes: u64,
76 pub disk_free_bytes: u64,
78 pub disk_used_percent_numeric: f64,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct HardwareInfo {
89 #[serde(default)]
91 pub accelerators: Vec<AcceleratorInfo>,
92 #[serde(default)]
94 pub jax_available: bool,
95 #[serde(default)]
97 pub jax_version: Option<String>,
98 #[serde(default)]
100 pub jax_device_count: Option<usize>,
101
102 #[serde(default)]
104 pub tpu_devices: usize,
105 #[serde(default)]
106 pub gpu_devices: usize,
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct AcceleratorInfo {
112 pub kind: String,
114 pub count: usize,
116 #[serde(default)]
118 pub vendor: Option<String>,
119 #[serde(default)]
121 pub model: Option<String>,
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct ServiceInfo {
131 #[serde(default)]
133 pub detected_services: Vec<DetectedService>,
134
135 #[serde(default)]
137 pub vllm_version: Option<String>,
138 #[serde(default)]
139 pub vllm_running: bool,
140 #[serde(default)]
141 pub vllm_port_bound: bool,
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct DetectedService {
147 pub name: String,
149 #[serde(default)]
151 pub version: Option<String>,
152 #[serde(default)]
154 pub running: bool,
155 #[serde(default)]
157 pub ports: Vec<u16>,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
162pub struct NetworkInfo {
163 pub public_ip: String,
165 pub tunnel_running: bool,
167 pub tunnel_name: Option<String>,
169}
170
171impl Telemetry {
172 pub fn capture() -> Self {
178 let now = std::time::Instant::now();
179 {
180 let cache = TELEMETRY_CACHE.lock().unwrap_or_else(|e| e.into_inner());
181 if let Some((cached, instant)) = cache.as_ref() {
182 if now.duration_since(*instant).as_secs() < CACHE_TTL_SECS {
183 return cached.clone();
184 }
185 }
186 }
187
188 let timestamp = std::time::SystemTime::now()
189 .duration_since(std::time::UNIX_EPOCH)
190 .map(|d| d.as_secs())
191 .unwrap_or(0);
192
193 let telemetry = Self {
194 timestamp,
195 system: SystemInfo::capture(),
196 hardware: HardwareInfo::capture(),
197 services: ServiceInfo::capture(),
198 network: NetworkInfo::capture(),
199 };
200
201 let mut cache = TELEMETRY_CACHE.lock().unwrap_or_else(|e| e.into_inner());
202 *cache = Some((telemetry.clone(), now));
203 telemetry
204 }
205
206 pub fn print_report(&self) {
208 println!("\n{}", "=".repeat(60));
209 println!(" RUNTIMO TELEMETRY [{}]", self.timestamp);
210 println!("{}", "=".repeat(60));
211
212 println!("\n--- SYSTEM ---");
213 println!(" CPU : {}", self.system.cpu_model);
214 println!(
215 " RAM : {} total, {} free",
216 self.system.ram_total, self.system.ram_free
217 );
218 println!(
219 " Disk : {} total, {} free ({}% used)",
220 self.system.disk_total, self.system.disk_free, self.system.disk_used_percent
221 );
222 println!(" Uptime: {}", self.system.uptime);
223 println!(" Load : {}", self.system.load_average);
224
225 println!("\n--- HARDWARE ---");
226 if self.hardware.accelerators.is_empty() {
227 println!(" Accelerators: none detected");
228 } else {
229 for acc in &self.hardware.accelerators {
230 println!(
231 " {}: {}x {}{}",
232 acc.kind,
233 acc.count,
234 acc.model.as_deref().unwrap_or("unknown"),
235 acc.vendor
236 .as_ref()
237 .map(|v| format!(" ({})", v))
238 .unwrap_or_default()
239 );
240 }
241 }
242 if self.hardware.jax_available {
243 println!(
244 " JAX: v{} ({} devices)",
245 self.hardware
246 .jax_version
247 .clone()
248 .unwrap_or_else(|| "unknown".into()),
249 self.hardware.jax_device_count.unwrap_or(0)
250 );
251 }
252
253 println!("\n--- SERVICES ---");
254 if self.services.detected_services.is_empty() {
255 println!(" Services: none detected");
256 } else {
257 for svc in &self.services.detected_services {
258 let ports_str = if svc.ports.is_empty() {
259 String::new()
260 } else {
261 format!(
262 " ports=[{}]",
263 svc.ports
264 .iter()
265 .map(|p| p.to_string())
266 .collect::<Vec<_>>()
267 .join(",")
268 )
269 };
270 println!(
271 " {}: v{} ({}){}",
272 svc.name,
273 svc.version.as_deref().unwrap_or("?"),
274 if svc.running { "running" } else { "stopped" },
275 ports_str
276 );
277 }
278 }
279
280 println!("\n--- NETWORK ---");
281 println!(" Public IP: {}", self.network.public_ip);
282 println!(
283 " Tunnel: {} ({})",
284 if self.network.tunnel_running {
285 "running"
286 } else {
287 "not running"
288 },
289 self.network
290 .tunnel_name
291 .clone()
292 .unwrap_or_else(|| "unknown".into())
293 );
294
295 println!("\n{}", "=".repeat(60));
296 }
297}
298
299impl SystemInfo {
300 fn capture() -> Self {
301 let ram_total = run_cmd("free -h | grep Mem | awk '{print $2}'");
302 let ram_free = run_cmd("free -h | grep Mem | awk '{print $4}'");
303 let disk_total = run_cmd("df -h / | tail -1 | awk '{print $2}'");
304 let disk_free = run_cmd("df -h / | tail -1 | awk '{print $4}'");
305 let disk_pct_str = run_cmd("df / | tail -1 | awk '{print $5}'");
306 let disk_used_percent = disk_pct_str.replace('%', "");
307 let disk_used_percent_numeric = disk_used_percent.parse::<f64>().unwrap_or(0.0);
308 let ram_total_bytes = run_cmd("free -b | grep Mem | awk '{print $2}'")
309 .parse()
310 .unwrap_or(0);
311 let ram_free_bytes = run_cmd("free -b | grep Mem | awk '{print $4}'")
312 .parse()
313 .unwrap_or(0);
314 let disk_total_bytes = run_cmd("df --bytes / | tail -1 | awk '{print $2}'")
315 .parse()
316 .unwrap_or(0);
317 let disk_free_bytes = run_cmd("df --bytes / | tail -1 | awk '{print $4}'")
318 .parse()
319 .unwrap_or(0);
320
321 Self {
322 cpu_model: run_cmd("cat /proc/cpuinfo | grep 'model name' | head -1 | cut -d: -f2"),
323 ram_total,
324 ram_free,
325 disk_total,
326 disk_free,
327 disk_used_percent,
328 uptime: run_cmd("uptime -p"),
329 load_average: run_cmd("uptime | awk -F'load average:' '{print $2}'"),
330 ram_total_bytes,
331 ram_free_bytes,
332 disk_total_bytes,
333 disk_free_bytes,
334 disk_used_percent_numeric,
335 }
336 }
337}
338
339impl HardwareInfo {
340 fn capture() -> Self {
341 let mut accelerators = Vec::new();
342
343 let tpu_count: usize = run_cmd("ls /dev/accel* 2>/dev/null | wc -l")
345 .parse()
346 .unwrap_or(0);
347 if tpu_count > 0 {
348 accelerators.push(AcceleratorInfo {
349 kind: "tpu".into(),
350 count: tpu_count,
351 vendor: Some("google".into()),
352 model: None,
353 });
354 }
355
356 let nvidia_gpu_count: usize = run_cmd("nvidia-smi --list-gpus 2>/dev/null | wc -l")
358 .parse()
359 .unwrap_or(0);
360 if nvidia_gpu_count > 0 {
361 let model = run_cmd(
362 "nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1",
363 );
364 accelerators.push(AcceleratorInfo {
365 kind: "gpu".into(),
366 count: nvidia_gpu_count,
367 vendor: Some("nvidia".into()),
368 model: if model.is_empty() { None } else { Some(model) },
369 });
370 }
371
372 let amd_gpu_count: usize =
374 run_cmd("rocm-smi --showproductname 2>/dev/null | grep -c 'GPU\\['")
375 .parse()
376 .unwrap_or(0);
377 if amd_gpu_count > 0 {
378 accelerators.push(AcceleratorInfo {
379 kind: "gpu".into(),
380 count: amd_gpu_count,
381 vendor: Some("amd".into()),
382 model: None,
383 });
384 }
385
386 if nvidia_gpu_count == 0 && amd_gpu_count == 0 {
388 let dri_count: usize = run_cmd(
389 "ls /dev/dri/render* 2>/dev/null | wc -l",
390 )
391 .parse()
392 .unwrap_or(0);
393 if dri_count > 0 {
394 accelerators.push(AcceleratorInfo {
395 kind: "gpu".into(),
396 count: dri_count,
397 vendor: None,
398 model: Some("drm-render".into()),
399 });
400 }
401 }
402
403 let jax_available =
404 run_cmd("timeout 10 python3 -c 'import jax' 2>/dev/null && echo yes || echo no") == "yes";
405 let jax_version = if jax_available {
406 Some(run_cmd("timeout 10 python3 -c 'import jax; print(jax.__version__)'"))
407 } else {
408 None
409 };
410 let jax_device_count = if jax_available {
411 run_cmd("timeout 10 python3 -c 'import jax; print(len(jax.devices()))'")
412 .parse()
413 .ok()
414 } else {
415 None
416 };
417
418 let total_tpu = accelerators
420 .iter()
421 .filter(|a| a.kind == "tpu")
422 .map(|a| a.count)
423 .sum();
424 let total_gpu = accelerators
425 .iter()
426 .filter(|a| a.kind == "gpu")
427 .map(|a| a.count)
428 .sum();
429
430 Self {
431 accelerators,
432 jax_available,
433 jax_version,
434 jax_device_count,
435 tpu_devices: total_tpu,
436 gpu_devices: total_gpu,
437 }
438 }
439}
440
441impl ServiceInfo {
442 fn capture() -> Self {
443 let mut detected = Vec::new();
444
445 let listening = parse_listening_ports();
447
448 for &port in &listening {
449 if let Some(svc) = detect_service_for_port(port) {
450 if !detected.iter().any(|s: &DetectedService| s.name == svc.name) {
452 detected.push(svc);
453 }
454 }
455 }
456
457 let vllm_version_str = detected
459 .iter()
460 .find(|s| s.name == "vllm")
461 .and_then(|s| s.version.clone());
462 let vllm_running = detected.iter().any(|s| s.name == "vllm" && s.running);
463 let vllm_port_bound = detected
464 .iter()
465 .find(|s| s.name == "vllm")
466 .map(|s| s.ports.contains(&8200))
467 .unwrap_or(false);
468
469 Self {
470 detected_services: detected,
471 vllm_version: vllm_version_str,
472 vllm_running,
473 vllm_port_bound,
474 }
475 }
476}
477
478fn parse_listening_ports() -> Vec<u16> {
480 let output = run_cmd("ss -ltnp 2>/dev/null");
481 let mut result = Vec::new();
482
483 for line in output.lines().skip(1) {
484 let parts: Vec<&str> = line.split_whitespace().collect();
485 if parts.len() < 5 {
486 continue;
487 }
488
489 let addr_port = parts[4];
490 let port = match addr_port.rsplit(':').next().and_then(|p| p.parse::<u16>().ok()) {
491 Some(p) => p,
492 None => continue,
493 };
494
495 result.push(port);
496 }
497
498 result
499}
500
501fn detect_service_for_port(port: u16) -> Option<DetectedService> {
504 match port {
505 22 => Some(DetectedService {
506 name: "ssh".into(),
507 version: run_cmd("sshd -V 2>&1 | head -1").into(),
508 running: true,
509 ports: vec![22],
510 }),
511 80 | 443 => Some(DetectedService {
512 name: "nginx".into(),
513 version: detect_version("nginx -v 2>&1 | grep -oP 'nginx/\\K[0-9.]+'"),
514 running: true,
515 ports: vec![port],
516 }),
517 3306 => Some(DetectedService {
518 name: "mysql".into(),
519 version: detect_version("mysql --version 2>/dev/null | grep -oP '[0-9]+\\.[0-9]+\\.[0-9]+'"),
520 running: true,
521 ports: vec![3306],
522 }),
523 5432 => Some(DetectedService {
524 name: "postgres".into(),
525 version: detect_version("postgres --version 2>/dev/null | grep -oP '[0-9]+\\.[0-9]+'"),
526 running: true,
527 ports: vec![5432],
528 }),
529 6379 => Some(DetectedService {
530 name: "redis".into(),
531 version: detect_version("redis-server --version 2>/dev/null | grep -oP 'v=[0-9]+\\.[0-9]+\\.[0-9]+'"),
532 running: true,
533 ports: vec![6379],
534 }),
535 27017 => Some(DetectedService {
536 name: "mongodb".into(),
537 version: detect_version("mongod --version 2>/dev/null | grep -oP '[0-9]+\\.[0-9]+\\.[0-9]+'"),
538 running: true,
539 ports: vec![27017],
540 }),
541 _ => None,
542 }
543}
544
545fn detect_version(cmd: &str) -> Option<String> {
547 let v = run_cmd(cmd);
548 if v.is_empty() { None } else { Some(v) }
549}
550
551impl NetworkInfo {
552 fn capture() -> Self {
553 let public_ip = run_cmd("curl -s --connect-timeout 5 --max-time 5 ifconfig.me 2>/dev/null || echo 'unknown'");
554 let tunnel_output = run_cmd("pgrep -fa cloudflared");
555 let tunnel_running = !tunnel_output.is_empty();
556 let tunnel_name = if tunnel_running {
557 Some(tunnel_output)
558 } else {
559 None
560 };
561
562 Self {
563 public_ip,
564 tunnel_running,
565 tunnel_name,
566 }
567 }
568}
569
570#[cfg(test)]
571mod tests {
572 use super::*;
573
574 #[test]
575 fn test_telemetry_capture() {
576 let telemetry = Telemetry::capture();
577 assert!(telemetry.timestamp > 0, "timestamp must be positive");
578
579 let s = &telemetry.system;
580 assert!(!s.cpu_model.is_empty(), "cpu_model must not be empty");
581 assert!(s.ram_total_bytes > 0, "ram_total_bytes must be > 0");
582 assert!(!s.ram_total.is_empty(), "ram_total must not be empty");
583 assert!(!s.disk_total.is_empty(), "disk_total must not be empty");
584
585 let h = &telemetry.hardware;
586 assert!(
587 h.accelerators.iter().all(|a| !a.kind.is_empty()),
588 "accelerator kind must not be empty"
589 );
590 assert!(
591 h.accelerators.iter().all(|a| a.count > 0),
592 "accelerator count must be > 0"
593 );
594
595 let svc = &telemetry.services;
596 assert!(
597 svc.detected_services.iter().all(|s| !s.name.is_empty()),
598 "service name must not be empty"
599 );
600
601 let net = &telemetry.network;
602 assert!(!net.public_ip.is_empty(), "public_ip must not be empty");
603 }
604
605 #[test]
606 fn test_telemetry_cache_works() {
607 let t1 = Telemetry::capture();
608 let t2 = Telemetry::capture();
609 assert_eq!(t1.timestamp, t2.timestamp, "cached telemetry should be identical");
610 }
611
612 #[test]
613 fn test_accelerators_back_compat() {
614 let hw = HardwareInfo {
615 accelerators: vec![
616 AcceleratorInfo {
617 kind: "gpu".into(),
618 count: 4,
619 vendor: Some("nvidia".into()),
620 model: Some("A100".into()),
621 },
622 AcceleratorInfo {
623 kind: "tpu".into(),
624 count: 8,
625 vendor: Some("google".into()),
626 model: None,
627 },
628 ],
629 jax_available: false,
630 jax_version: None,
631 jax_device_count: None,
632 tpu_devices: 0,
633 gpu_devices: 0,
634 };
635
636 let total_tpu: usize = hw
637 .accelerators
638 .iter()
639 .filter(|a| a.kind == "tpu")
640 .map(|a| a.count)
641 .sum();
642 let total_gpu: usize = hw
643 .accelerators
644 .iter()
645 .filter(|a| a.kind == "gpu")
646 .map(|a| a.count)
647 .sum();
648
649 assert_eq!(total_tpu, 8, "back-compat tpu_devices should be 8");
650 assert_eq!(total_gpu, 4, "back-compat gpu_devices should be 4");
651 }
652
653 #[test]
654 fn test_accelerators_empty_is_valid() {
655 let hw = HardwareInfo {
656 accelerators: vec![],
657 jax_available: false,
658 jax_version: None,
659 jax_device_count: None,
660 tpu_devices: 0,
661 gpu_devices: 0,
662 };
663
664 assert!(hw.accelerators.is_empty());
665 assert_eq!(hw.tpu_devices, 0);
666 assert_eq!(hw.gpu_devices, 0);
667 }
668
669 #[test]
670 fn test_service_back_compat() {
671 let svc = ServiceInfo {
672 detected_services: vec![DetectedService {
673 name: "vllm".into(),
674 version: Some("0.6.0".into()),
675 running: true,
676 ports: vec![8200],
677 }],
678 vllm_version: None,
679 vllm_running: false,
680 vllm_port_bound: false,
681 };
682
683 let vllm = &svc.detected_services[0];
684 assert_eq!(vllm.name, "vllm");
685 assert_eq!(vllm.version.as_deref(), Some("0.6.0"));
686 assert!(vllm.running);
687 assert_eq!(vllm.ports, vec![8200]);
688 }
689
690 #[test]
691 fn test_services_empty_is_valid() {
692 let svc = ServiceInfo {
693 detected_services: vec![],
694 vllm_version: None,
695 vllm_running: false,
696 vllm_port_bound: false,
697 };
698
699 assert!(svc.detected_services.is_empty());
700 }
701
702 #[test]
703 fn test_telemetry_serialization_roundtrip() {
704 let hw = HardwareInfo {
705 accelerators: vec![
706 AcceleratorInfo {
707 kind: "gpu".into(),
708 count: 2,
709 vendor: Some("nvidia".into()),
710 model: Some("H100".into()),
711 },
712 ],
713 jax_available: true,
714 jax_version: Some("0.4.30".into()),
715 jax_device_count: Some(2),
716 tpu_devices: 0,
717 gpu_devices: 2,
718 };
719
720 let svc = ServiceInfo {
721 detected_services: vec![DetectedService {
722 name: "docker".into(),
723 version: Some("26.0.0".into()),
724 running: true,
725 ports: vec![],
726 }],
727 vllm_version: None,
728 vllm_running: false,
729 vllm_port_bound: false,
730 };
731
732 let json = serde_json::to_string(&hw).unwrap();
733 let parsed: HardwareInfo = serde_json::from_str(&json).unwrap();
734 assert_eq!(parsed.accelerators.len(), 1);
735 assert_eq!(parsed.accelerators[0].kind, "gpu");
736 assert_eq!(parsed.accelerators[0].model.as_deref(), Some("H100"));
737
738 let json = serde_json::to_string(&svc).unwrap();
739 let parsed: ServiceInfo = serde_json::from_str(&json).unwrap();
740 assert_eq!(parsed.detected_services.len(), 1);
741 assert_eq!(parsed.detected_services[0].name, "docker");
742 }
743
744 #[test]
745 fn test_telemetry_deserialize_old_wal_event() {
746 let old_json = r#"{
747 "tpu_devices": 8,
748 "gpu_devices": 4,
749 "jax_available": true,
750 "jax_version": "0.4.25",
751 "jax_device_count": 8
752 }"#;
753
754 let parsed: HardwareInfo = serde_json::from_str(old_json).unwrap();
755 assert_eq!(parsed.tpu_devices, 8);
756 assert_eq!(parsed.gpu_devices, 4);
757 assert!(parsed.accelerators.is_empty(),
758 "old WAL events deserialize with empty accelerators (backwards compat)");
759 assert!(parsed.jax_available);
760 }
761}