kanade_shared/kv.rs
1//! NATS KV bucket name + key helpers (spec §2.3.2).
2//!
3//! NATS KV bucket names must be domain-safe ASCII (a-z, A-Z, 0-9, _, -),
4//! so the spec's dotted names (`script.current`, `script.status`) are
5//! flattened to underscore form here.
6
7pub const BUCKET_SCRIPT_CURRENT: &str = "script_current";
8pub const BUCKET_SCRIPT_STATUS: &str = "script_status";
9pub const BUCKET_AGENTS_STATE: &str = "agents_state";
10pub const BUCKET_AGENT_CONFIG: &str = "agent_config";
11pub const BUCKET_AGENT_GROUPS: &str = "agent_groups";
12pub const BUCKET_SCHEDULES: &str = "schedules";
13
14/// Job catalog (v0.15) — operator-registered Manifests, keyed by
15/// `manifest.id`. Schedules and ad-hoc `kanade run --job-id ...` look
16/// jobs up here; the wire never round-trips an inline Manifest body
17/// through a Schedule again. Editing a job in-place retroactively
18/// changes what future schedule fires deploy.
19pub const BUCKET_JOBS: &str = "jobs";
20
21/// Parallel "operator source-of-truth YAML" stores keyed identically
22/// to `BUCKET_JOBS` / `BUCKET_SCHEDULES`. The agent / scheduler /
23/// projector all keep reading the JSON KVs above — these buckets
24/// exist only so the SPA's YAML editor can round-trip operator
25/// comments + script indentation + block-scalar style exactly.
26///
27/// Population is opportunistic: any `POST` with a
28/// `Content-Type: application/yaml` body stores the raw bytes here
29/// alongside the parsed JSON; JSON-content-type POSTs fall back to a
30/// `serde_yaml` dump so the buckets stay in lockstep with the JSON
31/// store (operator just loses comments on that path).
32pub const BUCKET_JOBS_YAML: &str = "jobs_yaml";
33pub const BUCKET_SCHEDULES_YAML: &str = "schedules_yaml";
34
35/// Fleet-wide singleton settings that aren't per-agent (so they don't
36/// belong in `agent_config`'s layered scopes) and aren't per-schedule
37/// (so they don't belong in `schedules`). First and only key so far is
38/// [`KEY_FREEZE`] (#418 Phase 5 global change-freeze). One small bucket
39/// both the backend scheduler and every agent's local scheduler watch.
40pub const BUCKET_FLEET_CONFIG: &str = "fleet_config";
41
42/// Singleton key in [`BUCKET_FLEET_CONFIG`] holding the JSON-encoded
43/// [`crate::manifest::Freeze`]. **Key absent ⇒ not frozen** (clearing
44/// the freeze is a KV delete), so readers treat a missing key as "fire
45/// normally" and only evaluate `Freeze::is_active` when the key exists.
46pub const KEY_FREEZE: &str = "freeze";
47
48/// KV bucket holding **per-(schedule, pc) last-dispatch marks** for the
49/// backend scheduler's in-flight suppression.
50///
51/// The per-pc / per-target dedup ([`crate::manifest::ExecMode`]) only
52/// sees *completed* runs (`execution_results`, exit_code = 0). Since
53/// #418 the reconcile poll runs every minute ([`crate::manifest::POLL_CRON`]),
54/// but a dispatched Command doesn't land a completion until
55/// `jitter (agent-side) + run + outbox drain` later — frequently
56/// several minutes with a 3–5 min jitter. Without a dispatch record the
57/// poll re-fires the same PC (or whole target) every tick across that
58/// gap. This bucket records "I dispatched (schedule, pc) at T" so the
59/// scheduler can suppress re-fire for a bounded window without waiting
60/// on the completion round-trip.
61///
62/// Values are the dispatch instant as an RFC3339 string. A bucket-wide
63/// `max_age` GCs marks once they're well past any suppression window,
64/// so the bucket can't grow unbounded; the suppression-window check
65/// itself lives in `scheduler::policy::suppress_dispatched`.
66pub const BUCKET_SCHEDULER_DISPATCH: &str = "scheduler_dispatch";
67
68/// Per-pc dispatch-mark key (OncePerPc).
69///
70/// The `pc.` / `target.` kind prefix keeps the two namespaces apart,
71/// and each component is **length-prefixed** (`<len>.<value>`) so no two
72/// distinct `(schedule_id, pc_id)` pairs can ever collide — even when an
73/// id contains the `.` separator: `("a.b", "c")` → `pc.3.a.b.1.c`,
74/// `("a", "b.c")` → `pc.1.a.3.b.c`. (Percent-/base64-encoding isn't an
75/// option: NATS KV keys only allow `[-/_=.a-zA-Z0-9]`, so the
76/// self-delimiting length prefix is the cheapest injective encoding that
77/// stays in-charset.)
78pub fn dispatch_mark_pc_key(schedule_id: &str, pc_id: &str) -> String {
79 format!(
80 "pc.{}.{}.{}.{}",
81 schedule_id.len(),
82 schedule_id,
83 pc_id.len(),
84 pc_id
85 )
86}
87
88/// Whole-target dispatch-mark key (OncePerTarget). One key per
89/// schedule — a per-target fire dispatches the whole target at once, so
90/// there's nothing per-pc to record. Length-prefixed for symmetry with
91/// [`dispatch_mark_pc_key`].
92pub fn dispatch_mark_target_key(schedule_id: &str) -> String {
93 format!("target.{}.{}", schedule_id.len(), schedule_id)
94}
95
96/// Object Store bucket holding raw agent binaries (one object per
97/// version, e.g. `0.2.0` → file bytes).
98pub const OBJECT_AGENT_RELEASES: &str = "agent_releases";
99
100/// Object Store holding **generic application packages** — anything
101/// the agent / kitting scripts pull down + install on endpoints.
102/// First consumer is the kanade-client app, but the bucket is
103/// intentionally generic: third-party installers (Webex, Teams,
104/// custom MSI bundles), upgrade scripts, configuration archives,
105/// etc. all live here.
106///
107/// Object keys are `<name>/<version>` — operator picks `<name>`
108/// once per package family (e.g. `kanade-client`,
109/// `webex-meetings`), then `<version>` per release (e.g.
110/// `0.41.0`, `2025.03`). Slashes are explicitly allowed by NATS
111/// Object Store key rules; the SPA / CLI / HTTP routes all carry
112/// the pair as two path segments.
113///
114/// Why a separate bucket from `agent_releases`:
115/// - `agent_releases` is fleet-critical (the agent's own self-
116/// update path). Keeping it small + audited matters.
117/// - `app_packages` is operator-curated user-space content. The
118/// lifecycle is different (operators add/remove packages
119/// freely; agent releases follow the release.yml pipeline).
120pub const OBJECT_APP_PACKAGES: &str = "app_packages";
121
122/// Object Store holding **manifest script bodies** referenced by
123/// `Execute::script_object` (SPEC §2.4.1's alternative to inline
124/// `script:` / repo-local `script_file:`). Per yukimemi/kanade
125/// issue #210, this is the "Plan B 4-bucket layout" sibling of
126/// `app_packages` — separated because scripts have a different
127/// lifecycle than installer binaries:
128///
129/// - Smaller (typical KB-to-low-MB, vs MB-to-hundreds-of-MB
130/// installers).
131/// - Coupled to manifest versions (script lifecycle = manifest
132/// lifecycle; the `script_current` / `script_status` KV gates
133/// in SPEC §2.6.2 already track manifest versions, so a
134/// matching dedicated bucket keeps the audit story aligned).
135/// - Different access pattern (every Command execute potentially
136/// fetches; vs installer fetched once per fleet deploy).
137///
138/// Object keys follow the same `<name>/<version>` shape as
139/// `app_packages` so the SPA / operator tooling stays uniform.
140/// For manifest-driven scripts `<name>` is the manifest id and
141/// `<version>` is the manifest version, but the bucket itself
142/// imposes no semantics on the pair — operator-uploaded
143/// ad-hoc scripts can use any `<name>/<version>` they like.
144pub const OBJECT_SCRIPTS: &str = "scripts";
145
146/// Object Store holding **overflow stdout / stderr blobs** for the
147/// `ExecResult` wire kind (#227). The default NATS `max_payload` is
148/// 1 MB; a result whose stdout / stderr exceeds it would reject the
149/// publish and pin the agent's outbox in a reconnect loop. The agent
150/// uploads any stdout / stderr larger than `STDOUT_INLINE_THRESHOLD`
151/// (256 KB, picked at 1/4 of the default max_payload so the rest of
152/// the ExecResult fields fit alongside) into this bucket and replaces
153/// the inline field with [`crate::wire::ExecResult::stdout_object`] /
154/// `stderr_object` pointers. Backend's results projector derefs the
155/// pointers before INSERT so downstream consumers (SQLite, SPA
156/// Activity, inventory projector) see the full text the same way
157/// they always have.
158///
159/// Object keys follow the shape `<request_id>/{stdout,stderr}` so
160/// stdout + stderr for the same execution share a sibling prefix —
161/// makes `kanade jetstream` listings group naturally and keeps the
162/// per-key namespace tight against duplicate uploads.
163///
164/// Per-bucket retention (not a stream-wide TTL since async-nats
165/// object_store inherits stream config): matches `STREAM_RESULTS`'s
166/// 30-day retention so an operator who can still query the result
167/// row in SQLite can also fetch the original blob if the inline
168/// copy ever needs re-projection.
169pub const OBJECT_RESULT_OUTPUT: &str = "result_output";
170
171/// Inline threshold for `ExecResult.stdout` / `.stderr`. Larger
172/// payloads overflow into [`OBJECT_RESULT_OUTPUT`]. 256 KB = 1/4 of
173/// the NATS default `max_payload` (1 MB) so the rest of the
174/// ExecResult JSON (request_id, exec_id, etc.) easily fits below the
175/// publish-reject ceiling.
176///
177/// Lives next to the bucket constant rather than on the agent side
178/// so the SPA / future operator tooling can quote the same threshold
179/// when explaining "why this result has no inline stdout".
180pub const STDOUT_INLINE_THRESHOLD: usize = 256 * 1024;
181
182/// Key inside [`BUCKET_AGENT_CONFIG`] carrying the broadcast target
183/// version. Agents watch this key and self-update when their running
184/// version drifts.
185pub const KEY_AGENT_TARGET_VERSION: &str = "target_version";
186
187/// Sprint 6 layered-config keys inside [`BUCKET_AGENT_CONFIG`]:
188/// * `global` — fleet-wide default ConfigScope JSON
189/// * `groups.<name>` — per-group override (partial ConfigScope)
190/// * `pcs.<pc_id>` — per-pc override (partial ConfigScope)
191///
192/// The `groups.` / `pcs.` prefixes let a `kv.keys()` walk pick out
193/// just the rows in one scope when listing.
194pub const KEY_AGENT_CONFIG_GLOBAL: &str = "global";
195pub const PREFIX_AGENT_CONFIG_GROUPS: &str = "groups.";
196pub const PREFIX_AGENT_CONFIG_PCS: &str = "pcs.";
197
198pub fn agent_config_group_key(group: &str) -> String {
199 format!("{PREFIX_AGENT_CONFIG_GROUPS}{group}")
200}
201
202pub fn agent_config_pc_key(pc_id: &str) -> String {
203 format!("{PREFIX_AGENT_CONFIG_PCS}{pc_id}")
204}
205
206/// Inverse of [`agent_config_group_key`] — returns the bare group
207/// name if `key` carries the groups-scope prefix, else `None`.
208pub fn parse_agent_config_group_key(key: &str) -> Option<&str> {
209 key.strip_prefix(PREFIX_AGENT_CONFIG_GROUPS)
210}
211
212/// Inverse of [`agent_config_pc_key`].
213pub fn parse_agent_config_pc_key(key: &str) -> Option<&str> {
214 key.strip_prefix(PREFIX_AGENT_CONFIG_PCS)
215}
216
217pub const SCRIPT_STATUS_ACTIVE: &str = "ACTIVE";
218pub const SCRIPT_STATUS_REVOKED: &str = "REVOKED";
219
220pub const STREAM_INVENTORY: &str = "INVENTORY";
221pub const STREAM_RESULTS: &str = "RESULTS";
222pub const STREAM_EXEC: &str = "EXEC";
223pub const STREAM_EVENTS: &str = "EVENTS";
224pub const STREAM_AUDIT: &str = "AUDIT";
225
226/// JetStream stream backing the per-PC observability event pipeline
227/// (Issue #246). Distinct from [`STREAM_EVENTS`] (in-flight script
228/// lifecycle) — `STREAM_OBS_EVENTS` carries the timeline data the
229/// SPA's Events page consumes: sign-in/out, power on/off, sleep/
230/// resume, agent milestones, diagnostic bundle pointers. The agent
231/// publishes on `obs.<pc_id>` (see [`crate::subject::obs`]) and
232/// this stream catches everything matching [`crate::subject::OBS_FILTER`]
233/// so a backend that boots after the agent doesn't miss any
234/// already-emitted events.
235pub const STREAM_OBS_EVENTS: &str = "OBS_EVENTS";
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240
241 /// NATS KV bucket names must be domain-safe ASCII (a-z, A-Z, 0-9, _, -).
242 /// Lock the constants down so a future edit doesn't introduce a `.` and
243 /// break create_key_value silently on the broker side.
244 #[test]
245 fn bucket_names_are_domain_safe() {
246 for name in [
247 BUCKET_SCRIPT_CURRENT,
248 BUCKET_SCRIPT_STATUS,
249 BUCKET_AGENTS_STATE,
250 BUCKET_AGENT_CONFIG,
251 BUCKET_AGENT_GROUPS,
252 BUCKET_SCHEDULES,
253 BUCKET_JOBS,
254 BUCKET_JOBS_YAML,
255 BUCKET_SCHEDULES_YAML,
256 BUCKET_FLEET_CONFIG,
257 BUCKET_SCHEDULER_DISPATCH,
258 OBJECT_AGENT_RELEASES,
259 OBJECT_APP_PACKAGES,
260 OBJECT_SCRIPTS,
261 OBJECT_RESULT_OUTPUT,
262 ] {
263 assert!(
264 !name.contains('.'),
265 "bucket name {name:?} contains a dot, which NATS KV rejects"
266 );
267 assert!(
268 name.chars()
269 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-'),
270 "bucket name {name:?} has non-domain-safe characters"
271 );
272 }
273 }
274
275 #[test]
276 fn stream_names_are_unique() {
277 let names = [
278 STREAM_INVENTORY,
279 STREAM_RESULTS,
280 STREAM_EXEC,
281 STREAM_EVENTS,
282 STREAM_AUDIT,
283 STREAM_OBS_EVENTS,
284 ];
285 let mut deduped = names.to_vec();
286 deduped.sort_unstable();
287 deduped.dedup();
288 assert_eq!(
289 deduped.len(),
290 names.len(),
291 "stream constants collide: {names:?}"
292 );
293 }
294
295 #[test]
296 fn script_status_strings() {
297 assert_eq!(SCRIPT_STATUS_ACTIVE, "ACTIVE");
298 assert_eq!(SCRIPT_STATUS_REVOKED, "REVOKED");
299 assert_ne!(SCRIPT_STATUS_ACTIVE, SCRIPT_STATUS_REVOKED);
300 }
301
302 #[test]
303 fn key_agent_target_version_constant() {
304 assert_eq!(KEY_AGENT_TARGET_VERSION, "target_version");
305 }
306
307 #[test]
308 fn agent_config_group_key_round_trips() {
309 let k = agent_config_group_key("canary");
310 assert_eq!(k, "groups.canary");
311 assert_eq!(parse_agent_config_group_key(&k), Some("canary"));
312 }
313
314 #[test]
315 fn agent_config_pc_key_round_trips() {
316 let k = agent_config_pc_key("PC-01");
317 assert_eq!(k, "pcs.PC-01");
318 assert_eq!(parse_agent_config_pc_key(&k), Some("PC-01"));
319 }
320
321 #[test]
322 fn dispatch_mark_keys_are_distinct_by_kind() {
323 // The whole-target key for one schedule must never equal the
324 // per-pc key for another — the `pc.` / `target.` prefixes keep
325 // the two namespaces apart even when ids look alike.
326 let per_pc = dispatch_mark_pc_key("collect-winlog-events", "PC-01");
327 let target = dispatch_mark_target_key("collect-winlog-events");
328 assert_eq!(per_pc, "pc.21.collect-winlog-events.5.PC-01");
329 assert_eq!(target, "target.21.collect-winlog-events");
330 assert_ne!(per_pc, target);
331 // A schedule literally named "collect-winlog-events.PC-01"
332 // still can't collide with the per-pc key above.
333 assert_ne!(
334 dispatch_mark_target_key("collect-winlog-events.PC-01"),
335 per_pc,
336 );
337 }
338
339 #[test]
340 fn dispatch_mark_pc_key_has_no_dot_collision() {
341 // Length-prefixing makes the encoding injective: a dotted
342 // schedule_id can't borrow a leading segment from the pc_id (or
343 // vice versa) to forge a colliding key. (CodeRabbit / claude #444.)
344 assert_ne!(
345 dispatch_mark_pc_key("a.b", "c"),
346 dispatch_mark_pc_key("a", "b.c"),
347 );
348 assert_ne!(
349 dispatch_mark_pc_key("x", "y.z"),
350 dispatch_mark_pc_key("x.y", "z"),
351 );
352 // Same components, swapped roles — also distinct.
353 assert_ne!(
354 dispatch_mark_pc_key("foo", "bar"),
355 dispatch_mark_pc_key("bar", "foo"),
356 );
357 }
358
359 #[test]
360 fn agent_config_scope_keys_do_not_collide() {
361 // Belt + braces: make sure no pc id starting with "groups." would
362 // be misparsed (or vice versa). The prefixes are distinct because
363 // they each end in `.` and the parent buckets disagree on what
364 // comes after — pcs holds host names, groups holds membership
365 // names — but locking the invariant in a test stops a future
366 // rename from breaking it.
367 assert_ne!(PREFIX_AGENT_CONFIG_GROUPS, PREFIX_AGENT_CONFIG_PCS);
368 assert!(parse_agent_config_group_key("pcs.someone").is_none());
369 assert!(parse_agent_config_pc_key("groups.someone").is_none());
370 assert_eq!(parse_agent_config_group_key("global"), None);
371 assert_eq!(parse_agent_config_pc_key("global"), None);
372 }
373}