Skip to main content

kanade_shared/wire/
heartbeat.rs

1use serde::{Deserialize, Serialize};
2
3/// Liveness ping every agent sends on a 30 s cadence (see
4/// `inventory_interval` / `heartbeat_interval` in agent_config).
5///
6/// `hostname` and `os_family` are enriched baseline facts so the
7/// SPA agents page has *something* to show as soon as the agent
8/// boots — even when the full WMI-driven `HwInventory` hasn't been
9/// (or can't be) collected. Both stay `Option<String>` so older
10/// agents that don't send them still deserialize cleanly.
11#[derive(Serialize, Deserialize, Debug, Clone)]
12pub struct Heartbeat {
13    pub pc_id: String,
14    pub at: chrono::DateTime<chrono::Utc>,
15    pub agent_version: String,
16    #[serde(default, skip_serializing_if = "Option::is_none")]
17    pub hostname: Option<String>,
18    /// Coarse OS bucket from `std::env::consts::OS` — `"windows"`,
19    /// `"linux"`, `"macos"`. Rich OS metadata still flows through
20    /// the inventory path; this is just the "agent is alive on a
21    /// <family>" signal.
22    #[serde(default, skip_serializing_if = "Option::is_none")]
23    pub os_family: Option<String>,
24    // v0.37 / Part 2: agent process self-perf. All Option so older
25    // agents (or any future build that hits a sysinfo error) keep
26    // sending valid heartbeats — backend just shows blanks. Cost on
27    // the agent is one `sysinfo::System::refresh_processes_specifics`
28    // call per 30 s tick. On Windows the underlying APIs are
29    // `CreateToolhelp32Snapshot` + per-process `GetProcessMemoryInfo`
30    // / `GetProcessIoCounters` (NOT WMI; NOT
31    // `NtQuerySystemInformation`). Single-digit ms on a typical
32    // endpoint; scales with the host's process count for the
33    // Toolhelp snapshot — fine on a normal PC, larger on RDS hosts.
34    /// Agent process CPU usage, in percent-of-one-core (a process
35    /// fully pinning one core reports 100; one pinning two cores
36    /// reports 200). This is sysinfo's convention — closer to
37    /// `top` than to Windows Task Manager (which normalises by
38    /// total cores, so a 1-core peg on an 8-core box shows up as
39    /// ~12.5 % in TM). Divide by host core count if you want a
40    /// host-normalised view. `None` is published on the very first
41    /// heartbeat after process start, because sysinfo's CPU% needs
42    /// two consecutive samples to diff — populating it would
43    /// always report 0.0 there and risk an operator misreading
44    /// "agent isn't doing anything".
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub agent_cpu_pct: Option<f64>,
47    /// Agent process resident set size in bytes — sysinfo's
48    /// `Process::memory()`, which on Windows is
49    /// `PROCESS_MEMORY_COUNTERS_EX::WorkingSetSize` (full working
50    /// set, shared + private). Closest Task Manager column is
51    /// "Working set (memory)", NOT "Memory (private working set)"
52    /// which would be `PrivateUsage` and sysinfo exposes
53    /// separately as `virtual_memory()`.
54    #[serde(default, skip_serializing_if = "Option::is_none")]
55    pub agent_rss_bytes: Option<i64>,
56    /// Absolute bytes the agent process has read from disk since
57    /// it started. Wire format is cumulative (not delta) so
58    /// dropped / out-of-order heartbeats don't poison rate math
59    /// for any client that wants to derive a rate by diffing
60    /// successive snapshots. Today neither the backend projector
61    /// nor the SPA does that diff — they just store and render
62    /// the cumulative value. Future SPA work or an exporter can
63    /// compute rate without a schema change.
64    #[serde(default, skip_serializing_if = "Option::is_none")]
65    pub agent_disk_read_bytes: Option<i64>,
66    /// Absolute bytes the agent process has written to disk since
67    /// it started. Same shape as `agent_disk_read_bytes`.
68    #[serde(default, skip_serializing_if = "Option::is_none")]
69    pub agent_disk_written_bytes: Option<i64>,
70    /// #582 Phase 2: versions this agent's boot sentinel rolled back
71    /// after they crash-looped on boot. The self-update path refuses
72    /// to (re-)deploy any version listed here, so the SPA's rollout
73    /// view can flag "PC-X failed to adopt target 0.43.51" — the
74    /// fleet-wide signal that a rollout is bad. Empty (the common
75    /// case) is skipped on the wire; older agents simply omit it and
76    /// `#[serde(default)]` leaves it empty.
77    #[serde(default, skip_serializing_if = "Vec::is_empty")]
78    pub quarantined_versions: Vec<String>,
79    /// Most-recently signed-in account on this host, read from the
80    /// Windows `LogonUI` registry key
81    /// (`HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Authentication\LogonUI\LastLoggedOnUser`).
82    /// This is the `DOMAIN\sam` (or `.\user`) login name the sign-in
83    /// screen last used; it survives logoff, so it's populated even
84    /// when no one is currently signed in. `None` on a never-signed-in
85    /// host and on non-Windows agents (`read_hklm_value` returns `None`
86    /// off-Windows) — see #655 for the cross-platform follow-up — so
87    /// older agents keep sending valid heartbeats either way.
88    #[serde(default, skip_serializing_if = "Option::is_none")]
89    pub last_logon_user: Option<String>,
90    /// Display name paired with [`Self::last_logon_user`], from
91    /// `LogonUI\LastLoggedOnDisplayName` (e.g. `"Yamada Taro"`). `None`
92    /// when unavailable.
93    #[serde(default, skip_serializing_if = "Option::is_none")]
94    pub last_logon_display_name: Option<String>,
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100    use chrono::TimeZone;
101
102    #[test]
103    fn heartbeat_round_trips_through_json() {
104        let hb = Heartbeat {
105            pc_id: "pc-01".into(),
106            at: chrono::Utc.with_ymd_and_hms(2026, 5, 16, 0, 0, 0).unwrap(),
107            agent_version: "0.12.0".into(),
108            hostname: Some("PC-01".into()),
109            os_family: Some("windows".into()),
110            agent_cpu_pct: Some(0.3),
111            agent_rss_bytes: Some(45_000_000),
112            agent_disk_read_bytes: Some(1024 * 1024),
113            agent_disk_written_bytes: Some(512 * 1024),
114            quarantined_versions: vec!["0.43.51".into()],
115            last_logon_user: Some("EXAMPLE\\taro".into()),
116            last_logon_display_name: Some("Yamada Taro".into()),
117        };
118        let json = serde_json::to_string(&hb).unwrap();
119        let back: Heartbeat = serde_json::from_str(&json).unwrap();
120        assert_eq!(back.pc_id, hb.pc_id);
121        assert_eq!(back.at, hb.at);
122        assert_eq!(back.agent_version, hb.agent_version);
123        assert_eq!(back.hostname, hb.hostname);
124        assert_eq!(back.os_family, hb.os_family);
125        assert_eq!(back.agent_cpu_pct, hb.agent_cpu_pct);
126        assert_eq!(back.agent_rss_bytes, hb.agent_rss_bytes);
127        assert_eq!(back.agent_disk_read_bytes, hb.agent_disk_read_bytes);
128        assert_eq!(back.agent_disk_written_bytes, hb.agent_disk_written_bytes);
129        assert_eq!(back.quarantined_versions, hb.quarantined_versions);
130        assert_eq!(back.last_logon_user, hb.last_logon_user);
131        assert_eq!(back.last_logon_display_name, hb.last_logon_display_name);
132    }
133
134    #[test]
135    fn heartbeat_empty_quarantine_is_omitted_on_the_wire() {
136        let hb = Heartbeat {
137            pc_id: "x".into(),
138            at: chrono::Utc.with_ymd_and_hms(2026, 5, 16, 0, 0, 0).unwrap(),
139            agent_version: "0.43.50".into(),
140            hostname: None,
141            os_family: None,
142            agent_cpu_pct: None,
143            agent_rss_bytes: None,
144            agent_disk_read_bytes: None,
145            agent_disk_written_bytes: None,
146            quarantined_versions: Vec::new(),
147            last_logon_user: None,
148            last_logon_display_name: None,
149        };
150        let json = serde_json::to_string(&hb).unwrap();
151        assert!(
152            !json.contains("quarantined_versions"),
153            "empty quarantine must be skipped on the wire: {json}",
154        );
155        // And a payload without the field still decodes to empty.
156        let back: Heartbeat = serde_json::from_str(&json).unwrap();
157        assert!(back.quarantined_versions.is_empty());
158    }
159
160    #[test]
161    fn heartbeat_without_enrichment_still_decodes() {
162        // Older agents sending only the v0.11 shape must still parse.
163        let json = r#"{"pc_id":"x","at":"2026-05-16T00:00:00Z","agent_version":"0.11.5"}"#;
164        let hb: Heartbeat = serde_json::from_str(json).unwrap();
165        assert_eq!(hb.pc_id, "x");
166        assert_eq!(hb.hostname, None);
167        assert_eq!(hb.os_family, None);
168        // v0.37 Part 2: perf fields are also optional and default
169        // to None, so a pre-0.37 agent's heartbeat keeps decoding.
170        assert_eq!(hb.agent_cpu_pct, None);
171        assert_eq!(hb.agent_rss_bytes, None);
172        assert_eq!(hb.agent_disk_read_bytes, None);
173        assert_eq!(hb.agent_disk_written_bytes, None);
174        // last-logon fields are optional too: a heartbeat that omits
175        // them (older agent, non-Windows host) decodes to None.
176        assert_eq!(hb.last_logon_user, None);
177        assert_eq!(hb.last_logon_display_name, None);
178    }
179}