Skip to main content

studio_worker/
sys.rs

1//! Host-system probes: hostname, OS user, VRAM.
2//!
3//! Every probe emits a structured tracing breadcrumb so an operator can
4//! tell from the logs *why* a worker reports the values it does (in
5//! particular, why VRAM came back as `0.0` — was the sysfs tree missing,
6//! present-but-unparseable, or is the worker running on a non-Linux
7//! host?).  Silent `0.0` makes "this worker claims nothing" impossible
8//! to diagnose from logs alone.
9use anyhow::Result;
10use std::path::Path;
11
12pub fn machine_name() -> String {
13    let name = hostname::get()
14        .ok()
15        .and_then(|s| s.into_string().ok())
16        .unwrap_or_else(|| "unknown-host".to_string());
17    tracing::debug!(
18        target: "studio_worker::sys",
19        op = "machine_name",
20        value = %name,
21        "resolved host machine name"
22    );
23    name
24}
25
26pub fn username() -> String {
27    let user = whoami::username();
28    tracing::debug!(
29        target: "studio_worker::sys",
30        op = "username",
31        value = %user,
32        "resolved OS user"
33    );
34    user
35}
36
37/// Detect physical VRAM on the host, in GB.  Returns 0.0 when we can't
38/// probe (no NVIDIA GPU, no driver) — the engine still runs in synthetic
39/// mode for low-end / CI machines.
40///
41/// This intentionally avoids a hard dependency on `nvml-wrapper` because
42/// it brings a heavy NVML build dep that we don't want at the CI layer.
43/// We probe `/proc/driver/nvidia/gpus/*/information` on Linux and just
44/// return 0 elsewhere.
45pub fn detect_vram_gb() -> Result<f32> {
46    #[cfg(target_os = "linux")]
47    let gb = detect_vram_gb_from_sysfs(Path::new("/proc/driver/nvidia/gpus"));
48    #[cfg(not(target_os = "linux"))]
49    let gb = {
50        tracing::info!(
51            target: "studio_worker::sys",
52            op = "probe_vram",
53            source = "unsupported_platform",
54            vram_gb = 0.0,
55            "VRAM probe unsupported on this OS — defaulting to 0 GB"
56        );
57        0.0_f32
58    };
59    Ok(gb)
60}
61
62/// VRAM probe driven by a configurable sysfs root.  Public-in-crate so
63/// the integration tests can exercise both the "missing root" and
64/// "populated root" branches without a real `/proc/driver/nvidia` tree.
65///
66/// Emits exactly one tracing event per call describing the outcome:
67///
68/// - `INFO source="no_nvidia_sysfs"` — `root` is not a directory.  This
69///   is the normal case on CI runners / non-GPU hosts.
70/// - `INFO source="nvidia_sysfs"` — at least one GPU's `information`
71///   file was parseable.  `gpu_count` reflects how many contributed.
72/// - `WARN source="sysfs_unparseable"` — directories were present but
73///   no `Video Memory` line was readable.  This is the surprising case
74///   we want operators to notice (e.g. driver version bump).
75pub fn detect_vram_gb_from_sysfs(root: &Path) -> f32 {
76    let entries = match std::fs::read_dir(root) {
77        Ok(e) => e,
78        Err(_) => {
79            tracing::info!(
80                target: "studio_worker::sys",
81                op = "probe_vram",
82                source = "no_nvidia_sysfs",
83                vram_gb = 0.0,
84                root = %root.display(),
85                "no NVIDIA sysfs tree at probe root — defaulting to 0 GB VRAM"
86            );
87            return 0.0;
88        }
89    };
90
91    let mut total_mib: f64 = 0.0;
92    let mut gpu_count: u32 = 0;
93    let mut parseable: u32 = 0;
94    for entry in entries.flatten() {
95        gpu_count += 1;
96        let info_path = entry.path().join("information");
97        if let Ok(content) = std::fs::read_to_string(&info_path) {
98            let mut found = false;
99            for line in content.lines() {
100                if let Some(rest) = line.trim().strip_prefix("Video Memory:") {
101                    if let Some(mib) = parse_mib(rest) {
102                        total_mib += mib;
103                        found = true;
104                    }
105                }
106            }
107            if found {
108                parseable += 1;
109            }
110        }
111    }
112
113    let vram_gb = (total_mib / 1024.0) as f32;
114    if parseable > 0 {
115        tracing::info!(
116            target: "studio_worker::sys",
117            op = "probe_vram",
118            source = "nvidia_sysfs",
119            vram_gb = vram_gb,
120            gpu_count = parseable,
121            "detected NVIDIA VRAM via sysfs"
122        );
123    } else {
124        tracing::warn!(
125            target: "studio_worker::sys",
126            op = "probe_vram",
127            source = "sysfs_unparseable",
128            vram_gb = 0.0,
129            gpu_count = gpu_count,
130            root = %root.display(),
131            "NVIDIA sysfs entries present but no Video Memory line parsed — driver layout change?"
132        );
133    }
134    vram_gb
135}
136
137fn parse_mib(s: &str) -> Option<f64> {
138    // Strings look like " 24576 MiB" or "24576 MB"
139    let trimmed = s.trim();
140    let mut parts = trimmed.split_whitespace();
141    let value = parts.next()?.parse::<f64>().ok()?;
142    let unit = parts.next().unwrap_or("MiB");
143    match unit.to_ascii_lowercase().as_str() {
144        "mib" | "mb" => Some(value),
145        "gib" | "gb" => Some(value * 1024.0),
146        _ => Some(value),
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    #[test]
155    fn parse_mib_handles_mib() {
156        assert_eq!(parse_mib(" 24576 MiB"), Some(24576.0));
157        assert_eq!(parse_mib("12288 MB"), Some(12288.0));
158        assert_eq!(parse_mib("24 GiB"), Some(24576.0));
159    }
160
161    #[test]
162    fn machine_name_returns_non_empty() {
163        assert!(!machine_name().is_empty());
164    }
165
166    #[test]
167    fn username_returns_non_empty() {
168        assert!(!username().is_empty());
169    }
170
171    #[test]
172    fn detect_vram_gb_from_sysfs_returns_zero_when_root_missing() {
173        let dir = tempfile::tempdir().unwrap();
174        let missing = dir.path().join("nope");
175        assert_eq!(detect_vram_gb_from_sysfs(&missing), 0.0);
176    }
177
178    #[test]
179    fn detect_vram_gb_from_sysfs_sums_parseable_gpus() {
180        let dir = tempfile::tempdir().unwrap();
181        for (bus, mib) in [("0000:01:00.0", "12288"), ("0000:02:00.0", "24576")] {
182            let gpu = dir.path().join(bus);
183            std::fs::create_dir_all(&gpu).unwrap();
184            std::fs::write(
185                gpu.join("information"),
186                format!("Model: x\nVideo Memory: {mib} MiB\n"),
187            )
188            .unwrap();
189        }
190        // (12288 + 24576) / 1024 = 36 GiB
191        let gb = detect_vram_gb_from_sysfs(dir.path());
192        assert!((gb - 36.0).abs() < 1e-3, "got {gb}");
193    }
194}