Skip to main content

harn_vm/
harness_system.rs

1//! Host introspection for the `harness.system.*` capability surface.
2//!
3//! The methods here back the read-only `cpu()`, `memory()`, `gpus()`,
4//! `temperature()`, `platform()`, and `processes()` accessors on the
5//! `HarnessSystem` sub-handle (issue #1912 / epic #1765). All values are
6//! returned as `serde_json::Value` shapes that `crate::stdlib::json_to_vm_value`
7//! lifts into dicts/lists for the VM.
8//!
9//! Privacy + cross-platform notes:
10//!
11//! * `processes()` includes the current Harn process unconditionally; its
12//!   direct children are tagged with `is_harn_owned: true` when they appear
13//!   in the system snapshot. We deliberately do **not** leak
14//!   `command_line` / `environ` / `cwd` for arbitrary host processes — only
15//!   pid, name, cpu%, memory bytes, and the harn-ownership flag are
16//!   returned. Hosts that need richer per-process introspection should
17//!   reach for their own privileged surface.
18//! * `temperature()` and `gpus()` may return empty / partial data on
19//!   platforms whose `sysinfo` backend doesn't expose those sensors
20//!   (notably Apple Silicon and most containers). Callers must treat the
21//!   fields as best-effort — missing data is conveyed via empty lists or
22//!   `null` field values rather than errors so scripts can degrade
23//!   gracefully (`"if a local GPU is available, prefer local model"`).
24//! * Tagging spawned subprocesses with the active pipeline / session id
25//!   is descoped to a follow-up: it requires plumbing through the
26//!   sandbox spawn path. The current implementation tags only direct
27//!   children of the harn process (parent pid match), which is enough to
28//!   power the emergency-signaling use case in the issue body.
29
30use std::collections::BTreeSet;
31use std::sync::Mutex;
32
33use serde_json::{json, Value};
34use sysinfo::{
35    Components, MemoryRefreshKind, Pid, ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System,
36};
37
38/// Registry of harn-owned child pids. Subprocess spawners (e.g. the
39/// `command_output` path in `stdlib::sandbox`) may register their
40/// children here so `processes()` can tag them with
41/// `is_harn_owned: true` even after the parent->child link is broken
42/// (e.g. detached agents).
43static HARN_OWNED_PIDS: Mutex<BTreeSet<u32>> = Mutex::new(BTreeSet::new());
44
45/// Register a pid as harn-owned. Idempotent.
46pub fn register_harn_owned_pid(pid: u32) {
47    if let Ok(mut set) = HARN_OWNED_PIDS.lock() {
48        set.insert(pid);
49    }
50}
51
52/// Stop tagging a pid as harn-owned (call when a tracked child exits).
53pub fn unregister_harn_owned_pid(pid: u32) {
54    if let Ok(mut set) = HARN_OWNED_PIDS.lock() {
55        set.remove(&pid);
56    }
57}
58
59fn harn_owned_pids_snapshot() -> BTreeSet<u32> {
60    HARN_OWNED_PIDS
61        .lock()
62        .map(|set| set.clone())
63        .unwrap_or_default()
64}
65
66/// Snapshot of CPU topology. `count` reflects logical cores; `frequency_mhz`
67/// is the first-core frequency reported by the OS (typically the current
68/// frequency; nominal on many platforms).
69pub fn cpu_snapshot() -> Value {
70    let mut sys = System::new_with_specifics(
71        RefreshKind::nothing().with_cpu(
72            sysinfo::CpuRefreshKind::nothing()
73                .with_cpu_usage()
74                .with_frequency(),
75        ),
76    );
77    sys.refresh_cpu_all();
78    let cpus = sys.cpus();
79    let count = cpus.len();
80    let physical_count = System::physical_core_count();
81    let (model, frequency_mhz) = match cpus.first() {
82        Some(cpu) => {
83            let brand = cpu.brand().trim().to_string();
84            (
85                if brand.is_empty() { None } else { Some(brand) },
86                Some(cpu.frequency()),
87            )
88        }
89        None => (None, None),
90    };
91    let cpu_usage = if cpus.is_empty() {
92        None
93    } else {
94        let total: f32 = cpus.iter().map(|c| c.cpu_usage()).sum();
95        Some(total as f64 / cpus.len() as f64)
96    };
97    json!({
98        "count": count,
99        "physical_count": physical_count,
100        "model": model,
101        "frequency_mhz": frequency_mhz,
102        "usage_pct": cpu_usage,
103    })
104}
105
106/// Snapshot of host memory. All sizes are bytes; cross-platform with
107/// graceful zeroes on hosts where a metric is unavailable.
108pub fn memory_snapshot() -> Value {
109    let mut sys = System::new_with_specifics(
110        RefreshKind::nothing().with_memory(MemoryRefreshKind::everything()),
111    );
112    sys.refresh_memory();
113    let total = sys.total_memory();
114    let used = sys.used_memory();
115    let available = sys.available_memory();
116    let total_gb = bytes_to_gb(total);
117    let used_gb = bytes_to_gb(used);
118    let available_gb = bytes_to_gb(available);
119    let pressure = if total == 0 {
120        "unknown"
121    } else {
122        let ratio = used as f64 / total as f64;
123        if ratio >= 0.85 {
124            "high"
125        } else if ratio >= 0.6 {
126            "medium"
127        } else {
128            "low"
129        }
130    };
131    json!({
132        "total_bytes": total,
133        "used_bytes": used,
134        "available_bytes": available,
135        "total_gb": total_gb,
136        "used_gb": used_gb,
137        "available_gb": available_gb,
138        "pressure": pressure,
139    })
140}
141
142/// Resident bytes for the current Harn process.
143pub fn current_process_memory_bytes() -> Option<u64> {
144    let pid = Pid::from_u32(std::process::id());
145    let mut sys = System::new();
146    sys.refresh_processes_specifics(
147        ProcessesToUpdate::Some(&[pid]),
148        false,
149        ProcessRefreshKind::nothing().with_memory(),
150    );
151    sys.process(pid).map(|process| process.memory())
152}
153
154/// Snapshot of attached GPUs. `sysinfo` does not expose GPU details
155/// directly across all platforms; we surface a non-fatal empty list so
156/// scripts can write `if !gpus.is_empty()` portably. Richer detection
157/// (NVML, Metal, OpenCL) is a follow-up tracked in the issue body.
158pub fn gpus_snapshot() -> Value {
159    Value::Array(Vec::new())
160}
161
162/// Snapshot of per-component temperatures (celsius). Returns `null`
163/// fields when the host does not expose a sensor, and an empty list
164/// when no thermal sensors are visible at all — common in containers,
165/// VMs, and on macOS where `sysinfo`'s thermal API has long-standing
166/// gaps.
167pub fn temperature_snapshot() -> Value {
168    let components = Components::new_with_refreshed_list();
169    let mut entries = Vec::new();
170    for component in &components {
171        entries.push(json!({
172            "label": component.label(),
173            "celsius": component.temperature(),
174            "max_celsius": component.max(),
175            "critical_celsius": component.critical(),
176        }));
177    }
178    json!({
179        "components": entries,
180    })
181}
182
183/// Snapshot of the host platform: os, arch, version, kernel.
184pub fn platform_snapshot() -> Value {
185    json!({
186        "os": System::name(),
187        "arch": std::env::consts::ARCH,
188        "version": System::os_version(),
189        "kernel": System::kernel_version(),
190        "long_os_version": System::long_os_version(),
191        "hostname": System::host_name(),
192    })
193}
194
195/// Snapshot of currently visible processes. The current Harn process is
196/// always included. Other processes are listed but with limited
197/// metadata — name, pid, cpu%, memory bytes, and an `is_harn_owned`
198/// flag derived from the parent pid match or the explicit
199/// [`register_harn_owned_pid`] registry. We do not return command line
200/// arguments, environment, or working directory: those can leak
201/// credentials and prompts from peer agents.
202pub fn processes_snapshot() -> Value {
203    let mut sys = System::new();
204    sys.refresh_processes_specifics(
205        ProcessesToUpdate::All,
206        false,
207        ProcessRefreshKind::nothing()
208            .with_cpu()
209            .with_memory()
210            .with_exe(sysinfo::UpdateKind::OnlyIfNotSet),
211    );
212    let our_pid = std::process::id();
213    let our_pid_sys = Pid::from_u32(our_pid);
214    let registry = harn_owned_pids_snapshot();
215
216    let mut entries = Vec::new();
217    for (pid, process) in sys.processes() {
218        let pid_u32 = pid.as_u32();
219        let parent_u32 = process.parent().map(|p| p.as_u32());
220        let is_harn_owned =
221            pid_u32 == our_pid || registry.contains(&pid_u32) || parent_u32 == Some(our_pid);
222        if !is_harn_owned {
223            // Limit per-process detail leakage: peer processes appear
224            // in the list as bare {pid, name} entries. Scripts that
225            // need the broader topology can opt into it via a future
226            // capability extension.
227            entries.push(json!({
228                "pid": pid_u32,
229                "name": process.name().to_string_lossy(),
230                "is_harn_owned": false,
231            }));
232            continue;
233        }
234        entries.push(json!({
235            "pid": pid_u32,
236            "parent_pid": parent_u32,
237            "name": process.name().to_string_lossy(),
238            "cpu_pct": process.cpu_usage(),
239            "mem_bytes": process.memory(),
240            "is_harn_owned": true,
241            "is_self": pid_u32 == our_pid,
242        }));
243    }
244
245    // Stable ordering: harn-owned first, then by pid ascending.
246    entries.sort_by(|a, b| {
247        let a_owned = a
248            .get("is_harn_owned")
249            .and_then(Value::as_bool)
250            .unwrap_or(false);
251        let b_owned = b
252            .get("is_harn_owned")
253            .and_then(Value::as_bool)
254            .unwrap_or(false);
255        b_owned.cmp(&a_owned).then_with(|| {
256            a.get("pid")
257                .and_then(Value::as_u64)
258                .cmp(&b.get("pid").and_then(Value::as_u64))
259        })
260    });
261
262    // sysinfo doesn't always include our own pid before the first
263    // refresh on some platforms (Windows); synthesize an entry so the
264    // contract "processes() always contains the running harn process"
265    // holds even on cold snapshots.
266    if !entries
267        .iter()
268        .any(|entry| entry.get("pid").and_then(Value::as_u64).map(|p| p as u32) == Some(our_pid))
269    {
270        entries.insert(
271            0,
272            json!({
273                "pid": our_pid,
274                "parent_pid": Value::Null,
275                "name": current_process_name(&sys, our_pid_sys),
276                "cpu_pct": 0.0,
277                "mem_bytes": 0,
278                "is_harn_owned": true,
279                "is_self": true,
280            }),
281        );
282    }
283
284    Value::Array(entries)
285}
286
287fn current_process_name(sys: &System, pid: Pid) -> String {
288    sys.process(pid)
289        .map(|process| process.name().to_string_lossy().into_owned())
290        .unwrap_or_else(|| "harn".to_string())
291}
292
293fn bytes_to_gb(bytes: u64) -> f64 {
294    bytes as f64 / 1_073_741_824.0
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300
301    #[test]
302    fn cpu_snapshot_reports_nonzero_count() {
303        let snapshot = cpu_snapshot();
304        let count = snapshot
305            .get("count")
306            .and_then(Value::as_u64)
307            .expect("count present");
308        assert!(count >= 1, "expected at least one logical cpu, got {count}");
309    }
310
311    #[test]
312    fn memory_snapshot_has_nonzero_total() {
313        let snapshot = memory_snapshot();
314        let total = snapshot
315            .get("total_bytes")
316            .and_then(Value::as_u64)
317            .expect("total_bytes present");
318        assert!(total > 0, "total memory should be non-zero, got {total}");
319        let pressure = snapshot
320            .get("pressure")
321            .and_then(Value::as_str)
322            .expect("pressure present");
323        assert!(
324            matches!(pressure, "low" | "medium" | "high" | "unknown"),
325            "pressure should be a known bucket, got {pressure:?}"
326        );
327    }
328
329    #[test]
330    fn gpus_snapshot_returns_list() {
331        let snapshot = gpus_snapshot();
332        assert!(snapshot.is_array(), "gpus snapshot is a list");
333    }
334
335    #[test]
336    fn temperature_snapshot_returns_components_field() {
337        let snapshot = temperature_snapshot();
338        assert!(
339            snapshot.get("components").is_some(),
340            "components field present"
341        );
342        assert!(
343            snapshot.get("components").unwrap().is_array(),
344            "components is array"
345        );
346    }
347
348    #[test]
349    fn platform_snapshot_includes_arch() {
350        let snapshot = platform_snapshot();
351        assert_eq!(
352            snapshot.get("arch").and_then(Value::as_str),
353            Some(std::env::consts::ARCH)
354        );
355    }
356
357    #[test]
358    fn processes_snapshot_includes_self() {
359        let snapshot = processes_snapshot();
360        let entries = snapshot.as_array().expect("array");
361        let our_pid = std::process::id() as u64;
362        let self_entry = entries
363            .iter()
364            .find(|entry| entry.get("pid").and_then(Value::as_u64) == Some(our_pid))
365            .expect("self entry present");
366        assert_eq!(
367            self_entry.get("is_harn_owned").and_then(Value::as_bool),
368            Some(true),
369            "self entry must be harn-owned"
370        );
371    }
372
373    #[test]
374    fn current_process_memory_bytes_reports_self_when_available() {
375        if let Some(bytes) = current_process_memory_bytes() {
376            assert!(bytes > 0, "current process memory should be non-zero");
377        }
378    }
379
380    #[test]
381    fn register_and_unregister_harn_owned_pid_round_trip() {
382        // pick a pid that's vanishingly unlikely to collide with self
383        let fake = u32::MAX - 1;
384        register_harn_owned_pid(fake);
385        assert!(harn_owned_pids_snapshot().contains(&fake));
386        unregister_harn_owned_pid(fake);
387        assert!(!harn_owned_pids_snapshot().contains(&fake));
388    }
389}