kanade_shared/kv.rs
1//! NATS KV bucket name + key helpers (spec §2.3.2).
2//!
3//! NATS KV bucket names must be domain-safe ASCII (a-z, A-Z, 0-9, _, -),
4//! so the spec's dotted names (`script.current`, `script.status`) are
5//! flattened to underscore form here.
6
7pub const BUCKET_SCRIPT_CURRENT: &str = "script_current";
8pub const BUCKET_SCRIPT_STATUS: &str = "script_status";
9pub const BUCKET_AGENTS_STATE: &str = "agents_state";
10pub const BUCKET_AGENT_CONFIG: &str = "agent_config";
11pub const BUCKET_AGENT_GROUPS: &str = "agent_groups";
12pub const BUCKET_SCHEDULES: &str = "schedules";
13
14/// Job catalog (v0.15) — operator-registered Manifests, keyed by
15/// `manifest.id`. Schedules and ad-hoc `kanade run --job-id ...` look
16/// jobs up here; the wire never round-trips an inline Manifest body
17/// through a Schedule again. Editing a job in-place retroactively
18/// changes what future schedule fires deploy.
19pub const BUCKET_JOBS: &str = "jobs";
20
21/// Parallel "operator source-of-truth YAML" stores keyed identically
22/// to `BUCKET_JOBS` / `BUCKET_SCHEDULES`. The agent / scheduler /
23/// projector all keep reading the JSON KVs above — these buckets
24/// exist only so the SPA's YAML editor can round-trip operator
25/// comments + script indentation + block-scalar style exactly.
26///
27/// Population is opportunistic: any `POST` with a
28/// `Content-Type: application/yaml` body stores the raw bytes here
29/// alongside the parsed JSON; JSON-content-type POSTs fall back to a
30/// `serde_yaml` dump so the buckets stay in lockstep with the JSON
31/// store (operator just loses comments on that path).
32pub const BUCKET_JOBS_YAML: &str = "jobs_yaml";
33pub const BUCKET_SCHEDULES_YAML: &str = "schedules_yaml";
34
35/// KV bucket holding **per-(schedule, pc) last-dispatch marks** for the
36/// backend scheduler's in-flight suppression.
37///
38/// The per-pc / per-target dedup ([`crate::manifest::ExecMode`]) only
39/// sees *completed* runs (`execution_results`, exit_code = 0). Since
40/// #418 the reconcile poll runs every minute ([`crate::manifest::POLL_CRON`]),
41/// but a dispatched Command doesn't land a completion until
42/// `jitter (agent-side) + run + outbox drain` later — frequently
43/// several minutes with a 3–5 min jitter. Without a dispatch record the
44/// poll re-fires the same PC (or whole target) every tick across that
45/// gap. This bucket records "I dispatched (schedule, pc) at T" so the
46/// scheduler can suppress re-fire for a bounded window without waiting
47/// on the completion round-trip.
48///
49/// Values are the dispatch instant as an RFC3339 string. A bucket-wide
50/// `max_age` GCs marks once they're well past any suppression window,
51/// so the bucket can't grow unbounded; the suppression-window check
52/// itself lives in `scheduler::policy::suppress_dispatched`.
53pub const BUCKET_SCHEDULER_DISPATCH: &str = "scheduler_dispatch";
54
55/// Per-pc dispatch-mark key (OncePerPc).
56///
57/// The `pc.` / `target.` kind prefix keeps the two namespaces apart,
58/// and each component is **length-prefixed** (`<len>.<value>`) so no two
59/// distinct `(schedule_id, pc_id)` pairs can ever collide — even when an
60/// id contains the `.` separator: `("a.b", "c")` → `pc.3.a.b.1.c`,
61/// `("a", "b.c")` → `pc.1.a.3.b.c`. (Percent-/base64-encoding isn't an
62/// option: NATS KV keys only allow `[-/_=.a-zA-Z0-9]`, so the
63/// self-delimiting length prefix is the cheapest injective encoding that
64/// stays in-charset.)
65pub fn dispatch_mark_pc_key(schedule_id: &str, pc_id: &str) -> String {
66 format!(
67 "pc.{}.{}.{}.{}",
68 schedule_id.len(),
69 schedule_id,
70 pc_id.len(),
71 pc_id
72 )
73}
74
75/// Whole-target dispatch-mark key (OncePerTarget). One key per
76/// schedule — a per-target fire dispatches the whole target at once, so
77/// there's nothing per-pc to record. Length-prefixed for symmetry with
78/// [`dispatch_mark_pc_key`].
79pub fn dispatch_mark_target_key(schedule_id: &str) -> String {
80 format!("target.{}.{}", schedule_id.len(), schedule_id)
81}
82
83/// Object Store bucket holding raw agent binaries (one object per
84/// version, e.g. `0.2.0` → file bytes).
85pub const OBJECT_AGENT_RELEASES: &str = "agent_releases";
86
87/// Object Store holding **generic application packages** — anything
88/// the agent / kitting scripts pull down + install on endpoints.
89/// First consumer is the kanade-client app, but the bucket is
90/// intentionally generic: third-party installers (Webex, Teams,
91/// custom MSI bundles), upgrade scripts, configuration archives,
92/// etc. all live here.
93///
94/// Object keys are `<name>/<version>` — operator picks `<name>`
95/// once per package family (e.g. `kanade-client`,
96/// `webex-meetings`), then `<version>` per release (e.g.
97/// `0.41.0`, `2025.03`). Slashes are explicitly allowed by NATS
98/// Object Store key rules; the SPA / CLI / HTTP routes all carry
99/// the pair as two path segments.
100///
101/// Why a separate bucket from `agent_releases`:
102/// - `agent_releases` is fleet-critical (the agent's own self-
103/// update path). Keeping it small + audited matters.
104/// - `app_packages` is operator-curated user-space content. The
105/// lifecycle is different (operators add/remove packages
106/// freely; agent releases follow the release.yml pipeline).
107pub const OBJECT_APP_PACKAGES: &str = "app_packages";
108
109/// Object Store holding **manifest script bodies** referenced by
110/// `Execute::script_object` (SPEC §2.4.1's alternative to inline
111/// `script:` / repo-local `script_file:`). Per yukimemi/kanade
112/// issue #210, this is the "Plan B 4-bucket layout" sibling of
113/// `app_packages` — separated because scripts have a different
114/// lifecycle than installer binaries:
115///
116/// - Smaller (typical KB-to-low-MB, vs MB-to-hundreds-of-MB
117/// installers).
118/// - Coupled to manifest versions (script lifecycle = manifest
119/// lifecycle; the `script_current` / `script_status` KV gates
120/// in SPEC §2.6.2 already track manifest versions, so a
121/// matching dedicated bucket keeps the audit story aligned).
122/// - Different access pattern (every Command execute potentially
123/// fetches; vs installer fetched once per fleet deploy).
124///
125/// Object keys follow the same `<name>/<version>` shape as
126/// `app_packages` so the SPA / operator tooling stays uniform.
127/// For manifest-driven scripts `<name>` is the manifest id and
128/// `<version>` is the manifest version, but the bucket itself
129/// imposes no semantics on the pair — operator-uploaded
130/// ad-hoc scripts can use any `<name>/<version>` they like.
131pub const OBJECT_SCRIPTS: &str = "scripts";
132
133/// Object Store holding **overflow stdout / stderr blobs** for the
134/// `ExecResult` wire kind (#227). The default NATS `max_payload` is
135/// 1 MB; a result whose stdout / stderr exceeds it would reject the
136/// publish and pin the agent's outbox in a reconnect loop. The agent
137/// uploads any stdout / stderr larger than `STDOUT_INLINE_THRESHOLD`
138/// (256 KB, picked at 1/4 of the default max_payload so the rest of
139/// the ExecResult fields fit alongside) into this bucket and replaces
140/// the inline field with [`crate::wire::ExecResult::stdout_object`] /
141/// `stderr_object` pointers. Backend's results projector derefs the
142/// pointers before INSERT so downstream consumers (SQLite, SPA
143/// Activity, inventory projector) see the full text the same way
144/// they always have.
145///
146/// Object keys follow the shape `<request_id>/{stdout,stderr}` so
147/// stdout + stderr for the same execution share a sibling prefix —
148/// makes `kanade jetstream` listings group naturally and keeps the
149/// per-key namespace tight against duplicate uploads.
150///
151/// Per-bucket retention (not a stream-wide TTL since async-nats
152/// object_store inherits stream config): matches `STREAM_RESULTS`'s
153/// 30-day retention so an operator who can still query the result
154/// row in SQLite can also fetch the original blob if the inline
155/// copy ever needs re-projection.
156pub const OBJECT_RESULT_OUTPUT: &str = "result_output";
157
158/// Inline threshold for `ExecResult.stdout` / `.stderr`. Larger
159/// payloads overflow into [`OBJECT_RESULT_OUTPUT`]. 256 KB = 1/4 of
160/// the NATS default `max_payload` (1 MB) so the rest of the
161/// ExecResult JSON (request_id, exec_id, etc.) easily fits below the
162/// publish-reject ceiling.
163///
164/// Lives next to the bucket constant rather than on the agent side
165/// so the SPA / future operator tooling can quote the same threshold
166/// when explaining "why this result has no inline stdout".
167pub const STDOUT_INLINE_THRESHOLD: usize = 256 * 1024;
168
169/// Key inside [`BUCKET_AGENT_CONFIG`] carrying the broadcast target
170/// version. Agents watch this key and self-update when their running
171/// version drifts.
172pub const KEY_AGENT_TARGET_VERSION: &str = "target_version";
173
174/// Sprint 6 layered-config keys inside [`BUCKET_AGENT_CONFIG`]:
175/// * `global` — fleet-wide default ConfigScope JSON
176/// * `groups.<name>` — per-group override (partial ConfigScope)
177/// * `pcs.<pc_id>` — per-pc override (partial ConfigScope)
178///
179/// The `groups.` / `pcs.` prefixes let a `kv.keys()` walk pick out
180/// just the rows in one scope when listing.
181pub const KEY_AGENT_CONFIG_GLOBAL: &str = "global";
182pub const PREFIX_AGENT_CONFIG_GROUPS: &str = "groups.";
183pub const PREFIX_AGENT_CONFIG_PCS: &str = "pcs.";
184
185pub fn agent_config_group_key(group: &str) -> String {
186 format!("{PREFIX_AGENT_CONFIG_GROUPS}{group}")
187}
188
189pub fn agent_config_pc_key(pc_id: &str) -> String {
190 format!("{PREFIX_AGENT_CONFIG_PCS}{pc_id}")
191}
192
193/// Inverse of [`agent_config_group_key`] — returns the bare group
194/// name if `key` carries the groups-scope prefix, else `None`.
195pub fn parse_agent_config_group_key(key: &str) -> Option<&str> {
196 key.strip_prefix(PREFIX_AGENT_CONFIG_GROUPS)
197}
198
199/// Inverse of [`agent_config_pc_key`].
200pub fn parse_agent_config_pc_key(key: &str) -> Option<&str> {
201 key.strip_prefix(PREFIX_AGENT_CONFIG_PCS)
202}
203
204pub const SCRIPT_STATUS_ACTIVE: &str = "ACTIVE";
205pub const SCRIPT_STATUS_REVOKED: &str = "REVOKED";
206
207pub const STREAM_INVENTORY: &str = "INVENTORY";
208pub const STREAM_RESULTS: &str = "RESULTS";
209pub const STREAM_EXEC: &str = "EXEC";
210pub const STREAM_EVENTS: &str = "EVENTS";
211pub const STREAM_AUDIT: &str = "AUDIT";
212
213/// JetStream stream backing the per-PC observability event pipeline
214/// (Issue #246). Distinct from [`STREAM_EVENTS`] (in-flight script
215/// lifecycle) — `STREAM_OBS_EVENTS` carries the timeline data the
216/// SPA's Events page consumes: sign-in/out, power on/off, sleep/
217/// resume, agent milestones, diagnostic bundle pointers. The agent
218/// publishes on `obs.<pc_id>` (see [`crate::subject::obs`]) and
219/// this stream catches everything matching [`crate::subject::OBS_FILTER`]
220/// so a backend that boots after the agent doesn't miss any
221/// already-emitted events.
222pub const STREAM_OBS_EVENTS: &str = "OBS_EVENTS";
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227
228 /// NATS KV bucket names must be domain-safe ASCII (a-z, A-Z, 0-9, _, -).
229 /// Lock the constants down so a future edit doesn't introduce a `.` and
230 /// break create_key_value silently on the broker side.
231 #[test]
232 fn bucket_names_are_domain_safe() {
233 for name in [
234 BUCKET_SCRIPT_CURRENT,
235 BUCKET_SCRIPT_STATUS,
236 BUCKET_AGENTS_STATE,
237 BUCKET_AGENT_CONFIG,
238 BUCKET_AGENT_GROUPS,
239 BUCKET_SCHEDULES,
240 BUCKET_JOBS,
241 BUCKET_JOBS_YAML,
242 BUCKET_SCHEDULES_YAML,
243 BUCKET_SCHEDULER_DISPATCH,
244 OBJECT_AGENT_RELEASES,
245 OBJECT_APP_PACKAGES,
246 OBJECT_SCRIPTS,
247 OBJECT_RESULT_OUTPUT,
248 ] {
249 assert!(
250 !name.contains('.'),
251 "bucket name {name:?} contains a dot, which NATS KV rejects"
252 );
253 assert!(
254 name.chars()
255 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-'),
256 "bucket name {name:?} has non-domain-safe characters"
257 );
258 }
259 }
260
261 #[test]
262 fn stream_names_are_unique() {
263 let names = [
264 STREAM_INVENTORY,
265 STREAM_RESULTS,
266 STREAM_EXEC,
267 STREAM_EVENTS,
268 STREAM_AUDIT,
269 STREAM_OBS_EVENTS,
270 ];
271 let mut deduped = names.to_vec();
272 deduped.sort_unstable();
273 deduped.dedup();
274 assert_eq!(
275 deduped.len(),
276 names.len(),
277 "stream constants collide: {names:?}"
278 );
279 }
280
281 #[test]
282 fn script_status_strings() {
283 assert_eq!(SCRIPT_STATUS_ACTIVE, "ACTIVE");
284 assert_eq!(SCRIPT_STATUS_REVOKED, "REVOKED");
285 assert_ne!(SCRIPT_STATUS_ACTIVE, SCRIPT_STATUS_REVOKED);
286 }
287
288 #[test]
289 fn key_agent_target_version_constant() {
290 assert_eq!(KEY_AGENT_TARGET_VERSION, "target_version");
291 }
292
293 #[test]
294 fn agent_config_group_key_round_trips() {
295 let k = agent_config_group_key("canary");
296 assert_eq!(k, "groups.canary");
297 assert_eq!(parse_agent_config_group_key(&k), Some("canary"));
298 }
299
300 #[test]
301 fn agent_config_pc_key_round_trips() {
302 let k = agent_config_pc_key("PC-01");
303 assert_eq!(k, "pcs.PC-01");
304 assert_eq!(parse_agent_config_pc_key(&k), Some("PC-01"));
305 }
306
307 #[test]
308 fn dispatch_mark_keys_are_distinct_by_kind() {
309 // The whole-target key for one schedule must never equal the
310 // per-pc key for another — the `pc.` / `target.` prefixes keep
311 // the two namespaces apart even when ids look alike.
312 let per_pc = dispatch_mark_pc_key("collect-winlog-events", "PC-01");
313 let target = dispatch_mark_target_key("collect-winlog-events");
314 assert_eq!(per_pc, "pc.21.collect-winlog-events.5.PC-01");
315 assert_eq!(target, "target.21.collect-winlog-events");
316 assert_ne!(per_pc, target);
317 // A schedule literally named "collect-winlog-events.PC-01"
318 // still can't collide with the per-pc key above.
319 assert_ne!(
320 dispatch_mark_target_key("collect-winlog-events.PC-01"),
321 per_pc,
322 );
323 }
324
325 #[test]
326 fn dispatch_mark_pc_key_has_no_dot_collision() {
327 // Length-prefixing makes the encoding injective: a dotted
328 // schedule_id can't borrow a leading segment from the pc_id (or
329 // vice versa) to forge a colliding key. (CodeRabbit / claude #444.)
330 assert_ne!(
331 dispatch_mark_pc_key("a.b", "c"),
332 dispatch_mark_pc_key("a", "b.c"),
333 );
334 assert_ne!(
335 dispatch_mark_pc_key("x", "y.z"),
336 dispatch_mark_pc_key("x.y", "z"),
337 );
338 // Same components, swapped roles — also distinct.
339 assert_ne!(
340 dispatch_mark_pc_key("foo", "bar"),
341 dispatch_mark_pc_key("bar", "foo"),
342 );
343 }
344
345 #[test]
346 fn agent_config_scope_keys_do_not_collide() {
347 // Belt + braces: make sure no pc id starting with "groups." would
348 // be misparsed (or vice versa). The prefixes are distinct because
349 // they each end in `.` and the parent buckets disagree on what
350 // comes after — pcs holds host names, groups holds membership
351 // names — but locking the invariant in a test stops a future
352 // rename from breaking it.
353 assert_ne!(PREFIX_AGENT_CONFIG_GROUPS, PREFIX_AGENT_CONFIG_PCS);
354 assert!(parse_agent_config_group_key("pcs.someone").is_none());
355 assert!(parse_agent_config_pc_key("groups.someone").is_none());
356 assert_eq!(parse_agent_config_group_key("global"), None);
357 assert_eq!(parse_agent_config_pc_key("global"), None);
358 }
359}