neuronbox_runtime/host/
nvidia.rs1use std::collections::HashMap;
4use std::process::Command;
5
6use super::snapshot::GpuRecord;
7
8#[cfg(all(target_os = "linux", feature = "nvml"))]
9use super::nvml_linux;
10
11#[derive(Debug, Clone)]
13pub struct NvidiaGpuListResult {
14 pub gpus: Option<Vec<GpuRecord>>,
15 pub probe_ok: bool,
17 pub used_nvml: bool,
19}
20
21pub fn query_gpus() -> NvidiaGpuListResult {
22 #[cfg(all(target_os = "linux", feature = "nvml"))]
23 if let Some(snap) = nvml_linux::try_snapshot() {
24 if !snap.gpus.is_empty() {
25 return NvidiaGpuListResult {
26 gpus: Some(snap.gpus),
27 probe_ok: true,
28 used_nvml: true,
29 };
30 }
31 }
32
33 let (gpus, ok) = query_gpus_nvidia_smi();
34 NvidiaGpuListResult {
35 gpus,
36 probe_ok: ok,
37 used_nvml: false,
38 }
39}
40
41fn query_gpus_nvidia_smi() -> (Option<Vec<GpuRecord>>, bool) {
42 let out = match Command::new("nvidia-smi")
43 .args([
44 "--query-gpu=index,name,memory.total,driver_version",
45 "--format=csv,noheader,nounits",
46 ])
47 .output()
48 {
49 Ok(o) => o,
50 Err(_) => return (None, false),
51 };
52
53 if !out.status.success() {
54 return (None, false);
55 }
56
57 let text = String::from_utf8_lossy(&out.stdout);
58 let mut gpus = Vec::new();
59 for line in text.lines() {
60 let line = line.trim();
61 if line.is_empty() {
62 continue;
63 }
64 let parts: Vec<&str> = line.split(',').map(|s| s.trim()).collect();
65 if parts.len() < 3 {
66 continue;
67 }
68 let Ok(index) = parts[0].parse::<u32>() else {
69 continue;
70 };
71 let name = parts[1].to_string();
72 let Ok(memory_total_mb) = parts[2].parse::<u64>() else {
73 continue;
74 };
75 let driver = parts.get(3).map(|s| s.to_string()).unwrap_or_default();
76 gpus.push(GpuRecord {
77 index,
78 name,
79 memory_total_mb,
80 backend: format!("CUDA (driver {driver})"),
81 });
82 }
83
84 if gpus.is_empty() {
85 (None, true)
86 } else {
87 (Some(gpus), true)
88 }
89}
90
91pub fn compute_apps_pid_memory_mb() -> Option<HashMap<u32, u64>> {
93 #[cfg(all(target_os = "linux", feature = "nvml"))]
94 if let Some(snap) = nvml_linux::try_snapshot() {
95 return Some(snap.pid_memory_mb);
96 }
97
98 compute_apps_pid_memory_mb_smi()
99}
100
101fn compute_apps_pid_memory_mb_smi() -> Option<HashMap<u32, u64>> {
102 let out = Command::new("nvidia-smi")
103 .args([
104 "--query-compute-apps=pid,used_gpu_memory",
105 "--format=csv,noheader,nounits",
106 ])
107 .output()
108 .ok()?;
109 if !out.status.success() {
110 return None;
111 }
112 let text = String::from_utf8_lossy(&out.stdout);
113 let mut map = HashMap::new();
114 for line in text.lines() {
115 let line = line.trim();
116 if line.is_empty() {
117 continue;
118 }
119 let parts: Vec<&str> = line.split(',').map(|x| x.trim()).collect();
120 if parts.len() < 2 {
121 continue;
122 }
123 let Ok(pid) = parts[0].parse::<u32>() else {
124 continue;
125 };
126 let mem = parts[1].replace(" MiB", "");
127 let Ok(mb) = mem.trim().parse::<u64>() else {
128 continue;
129 };
130 map.insert(pid, mb);
131 }
132 Some(map)
133}
134
135pub fn compute_apps_display_lines() -> Vec<String> {
137 #[cfg(all(target_os = "linux", feature = "nvml"))]
138 if let Some(snap) = nvml_linux::try_snapshot() {
139 return snap.display_lines;
140 }
141
142 compute_apps_display_lines_smi()
143}
144
145fn compute_apps_display_lines_smi() -> Vec<String> {
146 let out = Command::new("nvidia-smi")
147 .args([
148 "--query-compute-apps=pid,process_name,used_memory",
149 "--format=csv,noheader",
150 ])
151 .output();
152 match out {
153 Ok(o) if o.status.success() => {
154 let text = String::from_utf8_lossy(&o.stdout);
155 text.lines()
156 .map(|l| l.trim().to_string())
157 .filter(|l| !l.is_empty())
158 .collect()
159 }
160 _ => vec![],
161 }
162}