runtimo_core/telemetry.rs
1//! System Telemetry — Via Negativa: raw observation, no interpretation.
2//!
3//! Captures a snapshot of the host machine by reading `/proc` and `/sys`
4//! directly. Every field is backed by a raw kernel filesystem read — no
5//! shell-out for data available in `/proc`, no pgrep, no service name
6//! guessing, no version detection.
7//!
8//! # Via Negativa Philosophy
9//!
10//! This module removes everything that is not direct observation:
11//!
12//! - **No pgrep** — tunnel detection reads `/proc/[0-9]*/comm` files
13//! (process names, not command lines). The observer no longer matches
14//! its own shell command as a running `cloudflared` process.
15//! - **No service guessing** — port detection reads `/proc/net/tcp` and
16//! `/proc/net/tcp6` directly, returning raw `Vec<u16>`. Port 22 is
17//! just `22` — the consumer decides it is SSH.
18//! - **No `ss -ltnp` parsing** — eliminated >50 lines of fragile
19//! positional output parsing.
20//! - **No version detection** — no `sshd -V`, `nginx -v`, etc.
21//! - **Raw /proc reads** — cpuinfo, meminfo, uptime, loadavg, net/tcp.
22//! - **Shell-out only where no `/proc` equivalent exists** — `df` for
23//! disk, `curl` for public IP (opt-in), accelerator detection.
24//!
25//! # Example
26//!
27//! ```rust,ignore
28//! use runtimo_core::Telemetry;
29//!
30//! let tel = Telemetry::capture();
31//! tel.print_report();
32//! ```
33//!
34//! # Performance
35//!
36//! Results are cached for 30 seconds via [`TELEMETRY_CACHE`] to avoid
37//! repeated `/proc` reads on consecutive calls.
38
39use crate::cmd::run_cmd;
40use serde::{Deserialize, Serialize};
41use std::sync::Mutex;
42
43static TELEMETRY_CACHE: Mutex<Option<(Telemetry, std::time::Instant)>> = Mutex::new(None);
44const CACHE_TTL_SECS: u64 = 30;
45
46/// Full system telemetry snapshot.
47///
48/// Contains three sub-structures: [`SystemInfo`], [`HardwareInfo`],
49/// and [`NetworkInfo`], plus a Unix timestamp. Service detection has been
50/// removed in favor of raw listening ports in [`NetworkInfo`].
51#[derive(Debug, Clone, Serialize, Deserialize)]
52#[allow(clippy::exhaustive_structs)]
53pub struct Telemetry {
54 /// Unix timestamp (seconds) when the snapshot was taken.
55 pub timestamp: u64,
56 /// Basic system information (CPU, RAM, disk, uptime, load).
57 pub system: SystemInfo,
58 /// Special hardware devices (TPU, GPU, JAX availability).
59 pub hardware: HardwareInfo,
60 /// Network state (public IP, tunnel status, listening ports).
61 pub network: NetworkInfo,
62}
63
64/// Basic system information — direct `/proc` reads only.
65///
66/// No shell commands are used for data available in `/proc`. Disk
67/// information (`df`) is the only exception because Linux provides
68/// no per-mount usage summary in `/proc`.
69#[derive(Debug, Clone, Serialize, Deserialize)]
70#[allow(clippy::exhaustive_structs)]
71pub struct SystemInfo {
72 /// CPU model string from `/proc/cpuinfo` `model name` field.
73 pub cpu_model: String,
74 /// Logical CPU core count from `/proc/cpuinfo` (counts `processor` entries).
75 pub cpu_count: u32,
76 /// Total RAM in human-readable form (e.g. `"32Gi"`) from `/proc/meminfo`
77 /// `MemTotal` (kB → human).
78 pub ram_total: String,
79 /// Free RAM in human-readable form (e.g. `"750Mi"`) from `/proc/meminfo`
80 /// `MemFree` (kB → human).
81 pub ram_free: String,
82 /// Available RAM in human-readable form (e.g. `"22Gi"`) from `/proc/meminfo`
83 /// `MemAvailable` (kB → human). This is the memory usable for new
84 /// allocations without swapping — more useful than `ram_free` for
85 /// capacity planning.
86 pub ram_available: String,
87 /// Total disk space in human-readable form (e.g. `"100G"`) from `df -h /`.
88 pub disk_total: String,
89 /// Free disk space in human-readable form from `df -h /`.
90 pub disk_free: String,
91 /// Disk usage percentage as a string without `%` sign (e.g. `"45"`).
92 pub disk_used_percent: String,
93 /// Human-readable uptime (e.g. `"up 6 days, 3 hours"`) computed from
94 /// `/proc/uptime`.
95 pub uptime: String,
96 /// Machine-parseable uptime in seconds from `/proc/uptime` first field.
97 pub uptime_seconds: u64,
98 /// Load average string (e.g. `"0.50, 0.30, 0.20"`) from `/proc/loadavg`
99 /// first three fields.
100 pub load_average: String,
101}
102
103/// Special hardware device information.
104///
105/// Detects accelerators generically — GPUs (nvidia-smi, rocm-smi, /dev/dri),
106/// TPUs (/dev/accel*), and JAX availability. Reports what exists, not what
107/// was expected. Shell commands are used here because accelerator detection
108/// requires vendor-specific tools that have no `/proc` equivalent.
109#[derive(Debug, Clone, Serialize, Deserialize)]
110#[allow(clippy::exhaustive_structs)]
111pub struct HardwareInfo {
112 /// Detected accelerator devices (any kind). Empty vec = no accelerators found.
113 #[serde(default)]
114 pub accelerators: Vec<AcceleratorInfo>,
115 /// Whether the `jax` Python package is importable.
116 #[serde(default)]
117 pub jax_available: bool,
118 /// JAX version string (e.g. `"0.4.25"`), if available.
119 #[serde(default)]
120 pub jax_version: Option<String>,
121 /// Number of JAX-visible devices, if available.
122 #[serde(default)]
123 pub jax_device_count: Option<usize>,
124}
125
126/// A detected hardware accelerator.
127#[derive(Debug, Clone, Serialize, Deserialize)]
128#[allow(clippy::exhaustive_structs)]
129pub struct AcceleratorInfo {
130 /// Accelerator kind: "gpu", "tpu", "npu".
131 pub kind: String,
132 /// Number of devices of this kind detected.
133 pub count: usize,
134 /// Vendor name if identifiable (e.g. "nvidia", "amd", "google").
135 #[serde(default)]
136 pub vendor: Option<String>,
137 /// Device model string if available.
138 #[serde(default)]
139 pub model: Option<String>,
140}
141
142/// Network state information.
143///
144/// Public IP capture is **opt-in** via `RUNTIMO_ENABLE_PUBLIC_IP=1`.
145/// Without this env var, `public_ip` defaults to `"unknown"` to prevent
146/// unintended external network metadata leakage.
147///
148/// Tunnel detection reads `/proc/[0-9]*/comm` files (process names only,
149/// not command lines). This eliminates the self-match bug where `pgrep`
150/// would match the shell that runs `pgrep` itself.
151///
152/// Listening ports are read directly from `/proc/net/tcp` and `/proc/net/tcp6`
153/// — no `ss` shell-out, no service name guessing.
154#[derive(Debug, Clone, Serialize, Deserialize)]
155#[allow(clippy::exhaustive_structs)]
156pub struct NetworkInfo {
157 /// Public IP address (from `ifconfig.me` when `RUNTIMO_ENABLE_PUBLIC_IP=1`),
158 /// or `"unknown"`.
159 pub public_ip: String,
160 /// Whether a `cloudflared` tunnel process is running (detected via
161 /// `/proc/*/comm` content match, not pgrep).
162 pub tunnel_running: bool,
163 /// PID of the `cloudflared` process if found, extracted from the
164 /// `/proc/<pid>` directory name.
165 pub tunnel_pid: Option<u32>,
166 /// Raw listening TCP ports from `/proc/net/tcp` and `/proc/net/tcp6`.
167 /// Only ports in `LISTEN` (state `0A`) state are included.
168 /// Sorted ascending, duplicates removed.
169 #[serde(default)]
170 pub listening_ports: Vec<u16>,
171}
172
173// ── /proc file reading helpers ───────────────────────────────────────────
174
175/// Reads the entire contents of a `/proc` file into a `String`.
176///
177/// # Input
178///
179/// `path` — Absolute path to a `/proc` file (e.g. `"/proc/cpuinfo"`).
180///
181/// # Output
182///
183/// `Ok(String)` — Full file contents.
184/// `Err(io::Error)` — File does not exist, permission denied, or I/O error.
185///
186/// Callers must handle the error case — an empty `/proc` file is a
187/// valid success (e.g. empty tcp6 in a container), only I/O errors
188/// should produce `Err`.
189fn read_proc_file(path: &str) -> std::io::Result<String> {
190 std::fs::read_to_string(path)
191}
192
193/// Parses a `/proc/meminfo` key value in kB and returns the raw numeric value.
194///
195/// `/proc/meminfo` lines have the format `Key: 12345 kB`. This function
196/// finds the line starting with `key`, extracts the numeric value (first
197/// whitespace-delimited field after the colon), and parses it as `u64`.
198///
199/// Returns `0` if the key is not found or the value cannot be parsed.
200fn parse_meminfo_kb(data: &str, key: &str) -> u64 {
201 data.lines()
202 .find(|l| l.starts_with(key))
203 .and_then(|l| l.split_whitespace().nth(1))
204 .and_then(|v| v.parse::<u64>().ok())
205 .unwrap_or(0)
206}
207
208/// Converts a kilobyte count to a human-readable string.
209///
210/// Uses binary suffixes (KiB, MiB, GiB, TiB). Values >= 1000 KiB are
211/// displayed with the next-higher unit. The output format matches the
212/// `free -h` style: e.g. `"16Gi"`, `"750Mi"`, `"512Ki"`.
213///
214/// # Examples
215///
216/// - `format_mem_kb(512)` → `"512Ki"`
217/// - `format_mem_kb(768000)` → `"750Mi"`
218/// - `format_mem_kb(16777216)` → `"16Gi"`
219fn format_mem_kb(kb: u64) -> String {
220 if kb >= 1_048_576 {
221 // GiB: >= 1024^2 KiB
222 format!("{}Gi", kb / 1_048_576)
223 } else if kb >= 1_024 {
224 // MiB: >= 1024 KiB
225 format!("{}Mi", kb / 1_024)
226 } else {
227 // KiB: raw value
228 format!("{}Ki", kb)
229 }
230}
231
232/// Formats a duration in seconds into a human-readable uptime string.
233///
234/// Breaks down the duration into days, hours, and minutes. Omits zero-value
235/// units. The format matches `uptime -p` output: e.g. `"up 6 days, 3 hours,
236/// 12 minutes"`.
237///
238/// # Examples
239///
240/// - `format_uptime(60)` → `"up 1 minute"`
241/// - `format_uptime(3661)` → `"up 1 hour, 1 minute"`
242/// - `format_uptime(526380)` → `"up 6 days, 2 hours, 13 minutes"`
243fn format_uptime(total_seconds: u64) -> String {
244 let days = total_seconds / 86_400;
245 let hours = (total_seconds % 86_400) / 3_600;
246 let minutes = (total_seconds % 3_600) / 60;
247
248 let mut parts: Vec<String> = Vec::with_capacity(3);
249 if days > 0 {
250 parts.push(format!("{} day{}", days, if days == 1 { "" } else { "s" }));
251 }
252 if hours > 0 {
253 parts.push(format!(
254 "{} hour{}",
255 hours,
256 if hours == 1 { "" } else { "s" }
257 ));
258 }
259 if minutes > 0 || parts.is_empty() {
260 // Always show at least minutes
261 parts.push(format!(
262 "{} minute{}",
263 minutes,
264 if minutes == 1 { "" } else { "s" }
265 ));
266 }
267 format!("up {}", parts.join(", "))
268}
269
270// ── Telemetry capture ────────────────────────────────────────────────────
271
272impl Telemetry {
273 /// Captures a full system telemetry snapshot.
274 ///
275 /// Results are cached for [`CACHE_TTL_SECS`] (30 seconds) to avoid
276 /// repeated filesystem reads on consecutive calls. Network queries
277 /// (public_ip, tunnel) are included in the cached value.
278 pub fn capture() -> Self {
279 let now = std::time::Instant::now();
280 {
281 let cache = TELEMETRY_CACHE.lock().unwrap_or_else(|e| e.into_inner());
282 if let Some((cached, instant)) = cache.as_ref() {
283 if now.duration_since(*instant).as_secs() < CACHE_TTL_SECS {
284 return cached.clone();
285 }
286 }
287 }
288
289 let timestamp = std::time::SystemTime::now()
290 .duration_since(std::time::UNIX_EPOCH)
291 .map_or(0, |d| d.as_secs());
292
293 let telemetry = Self {
294 timestamp,
295 system: SystemInfo::capture(),
296 hardware: HardwareInfo::capture(),
297 network: NetworkInfo::capture(),
298 };
299
300 let mut cache = TELEMETRY_CACHE.lock().unwrap_or_else(|e| e.into_inner());
301 *cache = Some((telemetry.clone(), now));
302 telemetry
303 }
304
305 /// Prints telemetry in a human-readable report to stdout.
306 ///
307 /// Output includes CPU cores, RAM available, machine-parseable uptime
308 /// seconds, contextualized load average (with core count), raw listening
309 /// ports, and tunnel PID.
310 pub fn print_report(&self) {
311 println!("\n{}", "=".repeat(60));
312 println!(" RUNTIMO TELEMETRY [{}]", self.timestamp);
313 println!("{}", "=".repeat(60));
314
315 println!("\n--- SYSTEM ---");
316 println!(
317 " CPU : {} ({} cores)",
318 self.system.cpu_model, self.system.cpu_count
319 );
320 println!(
321 " RAM : {} total, {} free, {} available",
322 self.system.ram_total, self.system.ram_free, self.system.ram_available
323 );
324 println!(
325 " Disk : {} total, {} free ({}% used)",
326 self.system.disk_total, self.system.disk_free, self.system.disk_used_percent
327 );
328 // Machine-parseable uptime: "up 6 days (526380s)"
329 println!(
330 " Uptime: {} ({}s)",
331 self.system.uptime, self.system.uptime_seconds
332 );
333 // Contextualized load: "3.19, 4.93, 7.68 (4 cores)"
334 println!(
335 " Load : {} ({} cores)",
336 self.system.load_average, self.system.cpu_count
337 );
338
339 println!("\n--- HARDWARE ---");
340 if self.hardware.accelerators.is_empty() {
341 println!(" Accelerators: none detected");
342 } else {
343 for acc in &self.hardware.accelerators {
344 println!(
345 " {}: {}x {}{}",
346 acc.kind,
347 acc.count,
348 acc.model.as_deref().unwrap_or("unknown"),
349 acc.vendor
350 .as_ref()
351 .map(|v| format!(" ({})", v))
352 .unwrap_or_default()
353 );
354 }
355 }
356 if self.hardware.jax_available {
357 println!(
358 " JAX: v{} ({} devices)",
359 self.hardware
360 .jax_version
361 .clone()
362 .unwrap_or_else(|| "unknown".into()),
363 self.hardware.jax_device_count.unwrap_or(0)
364 );
365 }
366
367 println!("\n--- NETWORK ---");
368 println!(" Public IP: {}", self.network.public_ip);
369 // Tunnel with PID: "cloudflared (PID 1234)" or "none"
370 if self.network.tunnel_running {
371 println!(
372 " Tunnel: cloudflared (PID {})",
373 self.network
374 .tunnel_pid
375 .map_or_else(|| "?".to_string(), |p| p.to_string())
376 );
377 } else {
378 println!(" Tunnel: none");
379 }
380 if self.network.listening_ports.is_empty() {
381 println!(" Listening ports: none");
382 } else {
383 let ports_str = self
384 .network
385 .listening_ports
386 .iter()
387 .map(|p| p.to_string())
388 .collect::<Vec<_>>()
389 .join(", ");
390 println!(" Listening ports: {}", ports_str);
391 }
392
393 println!("\n{}", "=".repeat(60));
394 }
395}
396
397// ── SystemInfo capture — direct /proc reads ──────────────────────────────
398
399impl SystemInfo {
400 fn capture() -> Self {
401 // /proc/cpuinfo: extract model name and count logical processors
402 let cpuinfo = read_proc_file("/proc/cpuinfo").unwrap_or_default();
403 let cpu_model = cpuinfo
404 .lines()
405 .find(|l| l.starts_with("model name"))
406 .and_then(|l| l.split(':').nth(1))
407 .map_or_else(|| "unknown".to_string(), |s| s.trim().to_string());
408 // Count lines beginning with "processor" — each is a logical core
409 let cpu_count: u32 = cpuinfo
410 .lines()
411 .filter(|l| l.starts_with("processor"))
412 .count()
413 .try_into()
414 .unwrap_or(0);
415
416 // /proc/meminfo: MemTotal, MemFree, MemAvailable (all in kB)
417 let meminfo = read_proc_file("/proc/meminfo").unwrap_or_default();
418 let ram_total = format_mem_kb(parse_meminfo_kb(&meminfo, "MemTotal:"));
419 let ram_free = format_mem_kb(parse_meminfo_kb(&meminfo, "MemFree:"));
420 let ram_available = format_mem_kb(parse_meminfo_kb(&meminfo, "MemAvailable:"));
421
422 // /proc/uptime: first field is uptime in seconds (fractional).
423 // The value is always non-negative; cast truncation is safe.
424 let uptime = read_proc_file("/proc/uptime").unwrap_or_default();
425 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
426 let uptime_seconds: u64 = uptime
427 .split_whitespace()
428 .next()
429 .and_then(|s| s.parse::<f64>().ok())
430 .map_or(0, |f: f64| f as u64);
431 let uptime_str = format_uptime(uptime_seconds);
432
433 // /proc/loadavg: first three fields are 1/5/15 min load averages
434 let loadavg = read_proc_file("/proc/loadavg").unwrap_or_default();
435 let load_average = {
436 // Extract first three whitespace-separated fields from /proc/loadavg
437 let mut fields = loadavg.split_whitespace();
438 match (fields.next(), fields.next(), fields.next()) {
439 (Some(one), Some(five), Some(fifteen)) => {
440 format!("{one}, {five}, {fifteen}")
441 }
442 _ => String::from("unknown"),
443 }
444 };
445
446 // Disk: no /proc equivalent; keep df shell-out
447 let disk_total = run_cmd("df -h / | tail -1 | awk '{print $2}'");
448 let disk_free = run_cmd("df -h / | tail -1 | awk '{print $4}'");
449 let disk_pct_str = run_cmd("df / | tail -1 | awk '{print $5}'");
450 let disk_used_percent = disk_pct_str.replace('%', "");
451
452 Self {
453 cpu_model,
454 cpu_count,
455 ram_total,
456 ram_free,
457 ram_available,
458 disk_total,
459 disk_free,
460 disk_used_percent,
461 uptime: uptime_str,
462 uptime_seconds,
463 load_average,
464 }
465 }
466}
467
468// ── HardwareInfo capture — vendor tools (no /proc equivalent) ────────────
469
470impl HardwareInfo {
471 fn capture() -> Self {
472 let mut accelerators = Vec::new();
473
474 // TPU devices via /dev/accel*
475 let tpu_count: usize = run_cmd("ls /dev/accel* 2>/dev/null | wc -l")
476 .parse()
477 .unwrap_or(0);
478 if tpu_count > 0 {
479 accelerators.push(AcceleratorInfo {
480 kind: "tpu".into(),
481 count: tpu_count,
482 vendor: Some("google".into()),
483 model: None,
484 });
485 }
486
487 // NVIDIA GPUs via nvidia-smi
488 let nvidia_gpu_count: usize = run_cmd("nvidia-smi --list-gpus 2>/dev/null | wc -l")
489 .parse()
490 .unwrap_or(0);
491 if nvidia_gpu_count > 0 {
492 let model =
493 run_cmd("nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1");
494 accelerators.push(AcceleratorInfo {
495 kind: "gpu".into(),
496 count: nvidia_gpu_count,
497 vendor: Some("nvidia".into()),
498 model: if model.is_empty() { None } else { Some(model) },
499 });
500 }
501
502 // AMD GPUs via rocm-smi
503 let amd_gpu_count: usize =
504 run_cmd("rocm-smi --showproductname 2>/dev/null | grep -c 'GPU\\['")
505 .parse()
506 .unwrap_or(0);
507 if amd_gpu_count > 0 {
508 accelerators.push(AcceleratorInfo {
509 kind: "gpu".into(),
510 count: amd_gpu_count,
511 vendor: Some("amd".into()),
512 model: None,
513 });
514 }
515
516 // Generic DRM devices (fallback for any GPU)
517 if nvidia_gpu_count == 0 && amd_gpu_count == 0 {
518 let dri_count: usize = run_cmd("ls /dev/dri/render* 2>/dev/null | wc -l")
519 .parse()
520 .unwrap_or(0);
521 if dri_count > 0 {
522 accelerators.push(AcceleratorInfo {
523 kind: "gpu".into(),
524 count: dri_count,
525 vendor: None,
526 model: Some("drm-render".into()),
527 });
528 }
529 }
530
531 let jax_available =
532 run_cmd("timeout 10 python3 -c 'import jax' 2>/dev/null && echo yes || echo no")
533 == "yes";
534 let jax_version = if jax_available {
535 Some(run_cmd(
536 "timeout 10 python3 -c 'import jax; print(jax.__version__)'",
537 ))
538 } else {
539 None
540 };
541 let jax_device_count = if jax_available {
542 run_cmd("timeout 10 python3 -c 'import jax; print(len(jax.devices()))'")
543 .parse()
544 .ok()
545 } else {
546 None
547 };
548
549 Self {
550 accelerators,
551 jax_available,
552 jax_version,
553 jax_device_count,
554 }
555 }
556}
557
558// ── NetworkInfo capture — /proc for tunnels and ports ────────────────────
559
560impl NetworkInfo {
561 /// Captures network state with opt-in public IP, tunnel detection via
562 /// `/proc/*/comm`, and listening ports from `/proc/net/tcp` + `tcp6`.
563 ///
564 /// Public IP is only queried when `RUNTIMO_ENABLE_PUBLIC_IP=1`. Without it,
565 /// `public_ip` is set to `"unknown"`.
566 ///
567 /// Tunnel detection reads `/proc/[0-9]*/comm` files and checks if any
568 /// contain `"cloudflared"`. The `comm` file holds only the process name
569 /// (max 16 chars), never the command line — this eliminates the self-match
570 /// bug where `pgrep -fa cloudflared` matches its own shell invocation.
571 fn capture() -> Self {
572 let public_ip = if std::env::var("RUNTIMO_ENABLE_PUBLIC_IP").as_deref() == Ok("1") {
573 run_cmd(
574 "curl -s --connect-timeout 5 --max-time 5 ifconfig.me 2>/dev/null || echo 'unknown'",
575 )
576 } else {
577 "unknown".to_string()
578 };
579
580 let (tunnel_running, tunnel_pid) = detect_cloudflared();
581 let listening_ports = read_listening_ports();
582
583 Self {
584 public_ip,
585 tunnel_running,
586 tunnel_pid,
587 listening_ports,
588 }
589 }
590}
591
592/// Scans `/proc/[0-9]*/comm` for a `cloudflared` process.
593///
594/// # How it works
595///
596/// 1. Iterates all directory entries in `/proc` whose names consist solely
597/// of ASCII digits (these are PID directories).
598/// 2. Reads the `comm` file inside each PID directory — this file contains
599/// only the process name (truncated to 15 chars by the kernel), never
600/// the command line or arguments.
601/// 3. If the trimmed content equals `"cloudflared"`, extracts the PID from
602/// the directory name.
603///
604/// # Why `comm`, not `cmdline`
605///
606/// The `cmdline` file (`/proc/[pid]/cmdline`) contains the full command
607/// line (null-delimited), including arguments like `--token <value>`.
608/// Using `comm` avoids:
609/// - Reading potentially sensitive command-line tokens.
610/// - The self-match bug: `sh -c pgrep -fa cloudflared` contains `cloudflared`
611/// in its command line but NOT in its `comm` file (which would be `sh`
612/// or `pgrep`).
613///
614/// Returns `(true, Some(pid))` if found, `(false, None)` otherwise.
615fn detect_cloudflared() -> (bool, Option<u32>) {
616 // Read /proc directory — each numeric subdirectory is a PID
617 let Ok(dir) = std::fs::read_dir("/proc") else {
618 return (false, None);
619 };
620
621 for entry in dir.flatten() {
622 let path = entry.path();
623 // Only consider entries whose filename is purely numeric (PIDs)
624 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
625 continue;
626 };
627 if !name.chars().all(|c| c.is_ascii_digit()) {
628 continue;
629 }
630
631 let comm_path = path.join("comm");
632 let Ok(content) = std::fs::read_to_string(&comm_path) else {
633 continue;
634 };
635
636 if content.trim() == "cloudflared" {
637 if let Ok(pid) = name.parse::<u32>() {
638 return (true, Some(pid));
639 }
640 }
641 }
642
643 (false, None)
644}
645
646/// Reads listening TCP ports from `/proc/net/tcp` and `/proc/net/tcp6`.
647///
648/// # Format
649///
650/// Each line (after the header) has the format:
651/// ```text
652/// 0: 00000000:0016 00000000:0000 0A ...
653/// ```
654///
655/// - Column 2 (`00000000:0016`) is the local address. The part after the
656/// colon (`0016`) is the port number in hexadecimal.
657/// - Column 4 (`0A`) is the socket state in hexadecimal. `0A` = `LISTEN`.
658///
659/// Only entries with state `0A` (LISTEN) are included. Ports are sorted
660/// ascending and deduplicated.
661///
662/// # Why `/proc/net/tcp`, not `ss -ltnp`
663///
664/// - `/proc/net/tcp` is a kernel-provided procfs file — no subprocess,
665/// no command parsing, no fragile positional output logic.
666/// - `ss -ltnp` requires shell-out, parses variable-width columns, and
667/// may produce output that varies across `iproute2` versions.
668/// - The procfs format is stable kernel ABI.
669fn read_listening_ports() -> Vec<u16> {
670 let mut ports = Vec::new();
671
672 for path in &["/proc/net/tcp", "/proc/net/tcp6"] {
673 let data = read_proc_file(path).unwrap_or_default();
674 // Skip header line (starts with " sl")
675 for line in data.lines().skip(1) {
676 let parts: Vec<&str> = line.split_whitespace().collect();
677 // Minimum columns: sl(0:) + local_address + rem_address + state
678 if parts.len() < 4 {
679 continue;
680 }
681
682 // Column 2 = local_address (e.g. "00000000:0016")
683 // Column 4 = state (e.g. "0A" = LISTEN)
684 // Use .get() for clippy::indexing_slicing compliance
685 if parts.get(3) != Some(&"0A") {
686 continue;
687 }
688
689 // Extract port hex from local_address (portion after ':')
690 if let Some(port_hex) = parts.get(1).and_then(|addr| addr.split(':').nth(1)) {
691 if let Ok(port) = u16::from_str_radix(port_hex, 16) {
692 ports.push(port);
693 }
694 }
695 }
696 }
697
698 ports.sort_unstable();
699 ports.dedup();
700 ports
701}
702
703// ── Tests ────────────────────────────────────────────────────────────────
704
705#[cfg(test)]
706mod tests {
707 use super::*;
708
709 // ── SystemInfo tests ────────────────────────────────────────────
710
711 #[test]
712 fn test_telemetry_capture() {
713 let telemetry = Telemetry::capture();
714 assert!(telemetry.timestamp > 0, "timestamp must be positive");
715
716 let s = &telemetry.system;
717 assert!(!s.cpu_model.is_empty(), "cpu_model must not be empty");
718 assert!(s.cpu_count > 0, "cpu_count must be > 0");
719 assert!(!s.ram_total.is_empty(), "ram_total must not be empty");
720 assert!(!s.ram_free.is_empty(), "ram_free must not be empty");
721 assert!(
722 !s.ram_available.is_empty(),
723 "ram_available must not be empty"
724 );
725 assert!(!s.disk_total.is_empty(), "disk_total must not be empty");
726 assert!(s.uptime_seconds > 0, "uptime_seconds must be > 0");
727 assert!(!s.load_average.is_empty(), "load_average must not be empty");
728
729 let h = &telemetry.hardware;
730 assert!(
731 h.accelerators.iter().all(|a| !a.kind.is_empty()),
732 "accelerator kind must not be empty"
733 );
734 assert!(
735 h.accelerators.iter().all(|a| a.count > 0),
736 "accelerator count must be > 0"
737 );
738
739 let net = &telemetry.network;
740 assert!(!net.public_ip.is_empty(), "public_ip must not be empty");
741 // Default: public_ip is "unknown" unless RUNTIMO_ENABLE_PUBLIC_IP=1
742 assert_eq!(
743 net.public_ip, "unknown",
744 "public_ip should be 'unknown' by default (opt-in via RUNTIMO_ENABLE_PUBLIC_IP=1)"
745 );
746 // listening_ports is a Vec — can be empty in container/isolated env
747 assert!(
748 net.listening_ports.iter().all(|p| *p > 0),
749 "all listening ports must be > 0"
750 );
751 }
752
753 #[test]
754 fn test_telemetry_cache_works() {
755 let t1 = Telemetry::capture();
756 let t2 = Telemetry::capture();
757 assert_eq!(
758 t1.timestamp, t2.timestamp,
759 "cached telemetry should be identical"
760 );
761 }
762
763 #[test]
764 fn test_system_info_from_proc() {
765 // Verify cpu_count, ram_available, uptime_seconds are populated
766 // from /proc reads (not from shell commands that might fail in
767 // minimal containers).
768 let sys = SystemInfo::capture();
769 assert!(sys.cpu_count > 0, "cpu_count from /proc/cpuinfo");
770 assert!(
771 !sys.ram_available.is_empty(),
772 "ram_available from /proc/meminfo MemAvailable"
773 );
774 assert!(sys.uptime_seconds > 0, "uptime_seconds from /proc/uptime");
775 // uptime string should be non-empty and start with "up"
776 assert!(
777 sys.uptime.starts_with("up "),
778 "uptime string should start with 'up ': got '{}'",
779 sys.uptime
780 );
781 // cpu_model should be non-empty
782 assert!(
783 !sys.cpu_model.is_empty(),
784 "cpu_model from /proc/cpuinfo model name"
785 );
786 }
787
788 #[test]
789 fn test_cloudflared_detection() {
790 // The cloudflared detection must NOT self-match.
791 // This test verifies that detecting cloudflared doesn't find
792 // the shell that is running the detection command (because it reads
793 // /proc/*/comm, not pgrep).
794 let (running, pid) = detect_cloudflared();
795
796 // If cloudflared is actually running on this machine, it should be found.
797 // But it should NEVER report pid of the detection process itself.
798 if running {
799 assert!(pid.is_some(), "tunnel_running implies tunnel_pid");
800 let found_pid = pid.unwrap();
801 // Verify the PID actually belongs to a cloudflared process
802 let comm_path = format!("/proc/{}/comm", found_pid);
803 if let Ok(content) = std::fs::read_to_string(&comm_path) {
804 assert_eq!(
805 content.trim(),
806 "cloudflared",
807 "PID {} comm should be 'cloudflared', got '{}'",
808 found_pid,
809 content.trim()
810 );
811 }
812 }
813 // Even if not running, the function must return cleanly
814 assert!(!running || pid.is_some());
815 }
816
817 #[test]
818 fn test_listening_ports() {
819 let ports = read_listening_ports();
820
821 // Verify no duplicate ports
822 let mut uniq = ports.clone();
823 uniq.dedup();
824 assert_eq!(
825 ports.len(),
826 uniq.len(),
827 "listening ports must have no duplicates"
828 );
829
830 // Verify ports are sorted
831 for w in ports.windows(2) {
832 assert!(w[0] <= w[1], "listening ports must be sorted: {:?}", ports);
833 }
834
835 // All ports should be valid (1-65535)
836 for &p in &ports {
837 assert!(p > 0, "port 0 is not a valid listening port");
838 }
839
840 // If this runs on a live system, ports is a Vec — it can be empty
841 // in isolated containers. That's valid — no asserting on length.
842 }
843
844 // ── Helper function tests ────────────────────────────────────────
845
846 #[test]
847 fn test_format_mem_kb() {
848 assert_eq!(format_mem_kb(512), "512Ki");
849 assert_eq!(format_mem_kb(1024), "1Mi");
850 assert_eq!(format_mem_kb(1536), "1Mi"); // >1024 snaps to Mi
851 assert_eq!(format_mem_kb(1048576), "1Gi");
852 assert_eq!(format_mem_kb(2097152), "2Gi");
853 assert_eq!(format_mem_kb(768000), "750Mi"); // ~750Mi
854 // Edge: 0 KB
855 assert_eq!(format_mem_kb(0), "0Ki");
856 }
857
858 #[test]
859 fn test_format_uptime() {
860 assert!(
861 format_uptime(0).contains("minute"),
862 "zero uptime: {}",
863 format_uptime(0)
864 );
865 assert!(
866 format_uptime(60).contains("1 minute"),
867 "60s: {}",
868 format_uptime(60)
869 );
870 assert!(
871 format_uptime(3600).contains("1 hour"),
872 "3600s: {}",
873 format_uptime(3600)
874 );
875 assert!(
876 format_uptime(86400).contains("1 day"),
877 "86400s: {}",
878 format_uptime(86400)
879 );
880 // All start with "up "
881 assert!(
882 format_uptime(12345).starts_with("up "),
883 "uptime should start with 'up '"
884 );
885 }
886
887 #[test]
888 fn test_parse_meminfo_kb() {
889 let sample = "MemTotal: 32768000 kB\nMemFree: 8000000 kB\nMemAvailable: 22000000 kB\n";
890 assert_eq!(parse_meminfo_kb(sample, "MemTotal:"), 32_768_000);
891 assert_eq!(parse_meminfo_kb(sample, "MemFree:"), 8_000_000);
892 assert_eq!(parse_meminfo_kb(sample, "MemAvailable:"), 22_000_000);
893 // Missing key
894 assert_eq!(parse_meminfo_kb(sample, "SwapTotal:"), 0);
895 // Empty input
896 assert_eq!(parse_meminfo_kb("", "MemTotal:"), 0);
897 }
898
899 // ── Backward compatibility tests ─────────────────────────────────
900
901 #[test]
902 fn test_accelerators_back_compat() {
903 let hw = HardwareInfo {
904 accelerators: vec![
905 AcceleratorInfo {
906 kind: "gpu".into(),
907 count: 4,
908 vendor: Some("nvidia".into()),
909 model: Some("A100".into()),
910 },
911 AcceleratorInfo {
912 kind: "tpu".into(),
913 count: 8,
914 vendor: Some("google".into()),
915 model: None,
916 },
917 ],
918 jax_available: false,
919 jax_version: None,
920 jax_device_count: None,
921 };
922
923 let total_tpu: usize = hw
924 .accelerators
925 .iter()
926 .filter(|a| a.kind == "tpu")
927 .map(|a| a.count)
928 .sum();
929 let total_gpu: usize = hw
930 .accelerators
931 .iter()
932 .filter(|a| a.kind == "gpu")
933 .map(|a| a.count)
934 .sum();
935
936 assert_eq!(total_tpu, 8, "total tpu should be 8");
937 assert_eq!(total_gpu, 4, "total gpu should be 4");
938 }
939
940 #[test]
941 fn test_accelerators_empty_is_valid() {
942 let hw = HardwareInfo {
943 accelerators: vec![],
944 jax_available: false,
945 jax_version: None,
946 jax_device_count: None,
947 };
948
949 assert!(hw.accelerators.is_empty());
950 }
951
952 #[test]
953 fn test_telemetry_serialization_roundtrip() {
954 let hw = HardwareInfo {
955 accelerators: vec![AcceleratorInfo {
956 kind: "gpu".into(),
957 count: 2,
958 vendor: Some("nvidia".into()),
959 model: Some("H100".into()),
960 }],
961 jax_available: true,
962 jax_version: Some("0.4.30".into()),
963 jax_device_count: Some(2),
964 };
965
966 let net = NetworkInfo {
967 public_ip: "192.0.2.1".into(),
968 tunnel_running: false,
969 tunnel_pid: None,
970 listening_ports: vec![22, 80, 443],
971 };
972
973 let json = serde_json::to_string(&hw).unwrap();
974 let parsed: HardwareInfo = serde_json::from_str(&json).unwrap();
975 assert_eq!(parsed.accelerators.len(), 1);
976 assert_eq!(parsed.accelerators[0].kind, "gpu");
977 assert_eq!(parsed.accelerators[0].model.as_deref(), Some("H100"));
978
979 let json = serde_json::to_string(&net).unwrap();
980 let parsed: NetworkInfo = serde_json::from_str(&json).unwrap();
981 assert!(parsed.listening_ports.contains(&22));
982 assert!(parsed.listening_ports.contains(&443));
983 assert!(!parsed.tunnel_running);
984 assert!(parsed.tunnel_pid.is_none());
985 }
986
987 #[test]
988 fn test_telemetry_deserialize_old_wal_event() {
989 let old_json = r#"{
990 "jax_available": true,
991 "jax_version": "0.4.25",
992 "jax_device_count": 8
993 }"#;
994
995 let parsed: HardwareInfo = serde_json::from_str(old_json).unwrap();
996 assert!(
997 parsed.accelerators.is_empty(),
998 "old WAL events deserialize with empty accelerators"
999 );
1000 assert!(parsed.jax_available);
1001 }
1002
1003 #[test]
1004 fn test_network_info_listening_ports_roundtrip() {
1005 // Verify that listening_ports serializes/deserializes correctly
1006 let net = NetworkInfo {
1007 public_ip: "unknown".into(),
1008 tunnel_running: false,
1009 tunnel_pid: None,
1010 listening_ports: vec![22, 11434, 3389],
1011 };
1012
1013 let json = serde_json::to_string(&net).unwrap();
1014 let parsed: NetworkInfo = serde_json::from_str(&json).unwrap();
1015 assert_eq!(parsed.listening_ports, vec![22, 11434, 3389]);
1016 assert!(!parsed.tunnel_running);
1017 }
1018}