Skip to main content

kanade_shared/
kv.rs

1//! NATS KV bucket name + key helpers (spec §2.3.2).
2//!
3//! NATS KV bucket names must be domain-safe ASCII (a-z, A-Z, 0-9, _, -),
4//! so the spec's dotted names (`script.current`, `script.status`) are
5//! flattened to underscore form here.
6
7pub const BUCKET_SCRIPT_CURRENT: &str = "script_current";
8pub const BUCKET_SCRIPT_STATUS: &str = "script_status";
9pub const BUCKET_AGENTS_STATE: &str = "agents_state";
10pub const BUCKET_AGENT_CONFIG: &str = "agent_config";
11pub const BUCKET_AGENT_GROUPS: &str = "agent_groups";
12pub const BUCKET_SCHEDULES: &str = "schedules";
13
14/// Job catalog (v0.15) — operator-registered Manifests, keyed by
15/// `manifest.id`. Schedules and ad-hoc `kanade run --job-id ...` look
16/// jobs up here; the wire never round-trips an inline Manifest body
17/// through a Schedule again. Editing a job in-place retroactively
18/// changes what future schedule fires deploy.
19pub const BUCKET_JOBS: &str = "jobs";
20
21/// Parallel "operator source-of-truth YAML" stores keyed identically
22/// to `BUCKET_JOBS` / `BUCKET_SCHEDULES`. The agent / scheduler /
23/// projector all keep reading the JSON KVs above — these buckets
24/// exist only so the SPA's YAML editor can round-trip operator
25/// comments + script indentation + block-scalar style exactly.
26///
27/// Population is opportunistic: any `POST` with a
28/// `Content-Type: application/yaml` body stores the raw bytes here
29/// alongside the parsed JSON; JSON-content-type POSTs fall back to a
30/// `serde_yaml` dump so the buckets stay in lockstep with the JSON
31/// store (operator just loses comments on that path).
32pub const BUCKET_JOBS_YAML: &str = "jobs_yaml";
33pub const BUCKET_SCHEDULES_YAML: &str = "schedules_yaml";
34
35/// Fleet-wide singleton settings that aren't per-agent (so they don't
36/// belong in `agent_config`'s layered scopes) and aren't per-schedule
37/// (so they don't belong in `schedules`). First and only key so far is
38/// [`KEY_FREEZE`] (#418 Phase 5 global change-freeze). One small bucket
39/// both the backend scheduler and every agent's local scheduler watch.
40pub const BUCKET_FLEET_CONFIG: &str = "fleet_config";
41
42/// Singleton key in [`BUCKET_FLEET_CONFIG`] holding the JSON-encoded
43/// [`crate::manifest::Freeze`]. **Key absent ⇒ not frozen** (clearing
44/// the freeze is a KV delete), so readers treat a missing key as "fire
45/// normally" and only evaluate `Freeze::is_active` when the key exists.
46pub const KEY_FREEZE: &str = "freeze";
47
48/// KV bucket holding **per-(schedule, pc) last-dispatch marks** for the
49/// backend scheduler's in-flight suppression.
50///
51/// The per-pc / per-target dedup ([`crate::manifest::ExecMode`]) only
52/// sees *completed* runs (`execution_results`, exit_code = 0). Since
53/// #418 the reconcile poll runs every minute ([`crate::manifest::POLL_CRON`]),
54/// but a dispatched Command doesn't land a completion until
55/// `jitter (agent-side) + run + outbox drain` later — frequently
56/// several minutes with a 3–5 min jitter. Without a dispatch record the
57/// poll re-fires the same PC (or whole target) every tick across that
58/// gap. This bucket records "I dispatched (schedule, pc) at T" so the
59/// scheduler can suppress re-fire for a bounded window without waiting
60/// on the completion round-trip.
61///
62/// Values are the dispatch instant as an RFC3339 string. A bucket-wide
63/// `max_age` GCs marks once they're well past any suppression window,
64/// so the bucket can't grow unbounded; the suppression-window check
65/// itself lives in `scheduler::policy::suppress_dispatched`.
66pub const BUCKET_SCHEDULER_DISPATCH: &str = "scheduler_dispatch";
67
68/// Per-pc dispatch-mark key (OncePerPc).
69///
70/// The `pc.` / `target.` kind prefix keeps the two namespaces apart,
71/// and each component is **length-prefixed** (`<len>.<value>`) so no two
72/// distinct `(schedule_id, pc_id)` pairs can ever collide — even when an
73/// id contains the `.` separator: `("a.b", "c")` → `pc.3.a.b.1.c`,
74/// `("a", "b.c")` → `pc.1.a.3.b.c`. (Percent-/base64-encoding isn't an
75/// option: NATS KV keys only allow `[-/_=.a-zA-Z0-9]`, so the
76/// self-delimiting length prefix is the cheapest injective encoding that
77/// stays in-charset.)
78pub fn dispatch_mark_pc_key(schedule_id: &str, pc_id: &str) -> String {
79    format!(
80        "pc.{}.{}.{}.{}",
81        schedule_id.len(),
82        schedule_id,
83        pc_id.len(),
84        pc_id
85    )
86}
87
88/// Whole-target dispatch-mark key (OncePerTarget). One key per
89/// schedule — a per-target fire dispatches the whole target at once, so
90/// there's nothing per-pc to record. Length-prefixed for symmetry with
91/// [`dispatch_mark_pc_key`].
92pub fn dispatch_mark_target_key(schedule_id: &str) -> String {
93    format!("target.{}.{}", schedule_id.len(), schedule_id)
94}
95
96/// Object Store bucket holding raw agent binaries (one object per
97/// version, e.g. `0.2.0` → file bytes).
98pub const OBJECT_AGENT_RELEASES: &str = "agent_releases";
99
100/// Object Store holding **generic application packages** — anything
101/// the agent / kitting scripts pull down + install on endpoints.
102/// First consumer is the kanade-client app, but the bucket is
103/// intentionally generic: third-party installers (Webex, Teams,
104/// custom MSI bundles), upgrade scripts, configuration archives,
105/// etc. all live here.
106///
107/// Object keys are `<name>/<version>` — operator picks `<name>`
108/// once per package family (e.g. `kanade-client`,
109/// `webex-meetings`), then `<version>` per release (e.g.
110/// `0.41.0`, `2025.03`). Slashes are explicitly allowed by NATS
111/// Object Store key rules; the SPA / CLI / HTTP routes all carry
112/// the pair as two path segments.
113///
114/// Why a separate bucket from `agent_releases`:
115/// - `agent_releases` is fleet-critical (the agent's own self-
116///   update path). Keeping it small + audited matters.
117/// - `app_packages` is operator-curated user-space content. The
118///   lifecycle is different (operators add/remove packages
119///   freely; agent releases follow the release.yml pipeline).
120pub const OBJECT_APP_PACKAGES: &str = "app_packages";
121
122/// Object Store holding **manifest script bodies** referenced by
123/// `Execute::script_object` (SPEC §2.4.1's alternative to inline
124/// `script:` / repo-local `script_file:`). Per yukimemi/kanade
125/// issue #210, this is the "Plan B 4-bucket layout" sibling of
126/// `app_packages` — separated because scripts have a different
127/// lifecycle than installer binaries:
128///
129/// - Smaller (typical KB-to-low-MB, vs MB-to-hundreds-of-MB
130///   installers).
131/// - Coupled to manifest versions (script lifecycle = manifest
132///   lifecycle; the `script_current` / `script_status` KV gates
133///   in SPEC §2.6.2 already track manifest versions, so a
134///   matching dedicated bucket keeps the audit story aligned).
135/// - Different access pattern (every Command execute potentially
136///   fetches; vs installer fetched once per fleet deploy).
137///
138/// Object keys follow the same `<name>/<version>` shape as
139/// `app_packages` so the SPA / operator tooling stays uniform.
140/// For manifest-driven scripts `<name>` is the manifest id and
141/// `<version>` is the manifest version, but the bucket itself
142/// imposes no semantics on the pair — operator-uploaded
143/// ad-hoc scripts can use any `<name>/<version>` they like.
144pub const OBJECT_SCRIPTS: &str = "scripts";
145
146/// Object Store holding **overflow stdout / stderr blobs** for the
147/// `ExecResult` wire kind (#227). The default NATS `max_payload` is
148/// 1 MB; a result whose stdout / stderr exceeds it would reject the
149/// publish and pin the agent's outbox in a reconnect loop. The agent
150/// uploads any stdout / stderr larger than `STDOUT_INLINE_THRESHOLD`
151/// (256 KB, picked at 1/4 of the default max_payload so the rest of
152/// the ExecResult fields fit alongside) into this bucket and replaces
153/// the inline field with [`crate::wire::ExecResult::stdout_object`] /
154/// `stderr_object` pointers. Backend's results projector derefs the
155/// pointers before INSERT so downstream consumers (SQLite, SPA
156/// Activity, inventory projector) see the full text the same way
157/// they always have.
158///
159/// Object keys follow the shape `<request_id>/{stdout,stderr}` so
160/// stdout + stderr for the same execution share a sibling prefix —
161/// makes `kanade jetstream` listings group naturally and keeps the
162/// per-key namespace tight against duplicate uploads.
163///
164/// Per-bucket retention (not a stream-wide TTL since async-nats
165/// object_store inherits stream config): matches `STREAM_RESULTS`'s
166/// 30-day retention so an operator who can still query the result
167/// row in SQLite can also fetch the original blob if the inline
168/// copy ever needs re-projection.
169pub const OBJECT_RESULT_OUTPUT: &str = "result_output";
170
171/// Inline threshold for `ExecResult.stdout` / `.stderr`. Larger
172/// payloads overflow into [`OBJECT_RESULT_OUTPUT`]. 256 KB = 1/4 of
173/// the NATS default `max_payload` (1 MB) so the rest of the
174/// ExecResult JSON (request_id, exec_id, etc.) easily fits below the
175/// publish-reject ceiling.
176///
177/// Lives next to the bucket constant rather than on the agent side
178/// so the SPA / future operator tooling can quote the same threshold
179/// when explaining "why this result has no inline stdout".
180pub const STDOUT_INLINE_THRESHOLD: usize = 256 * 1024;
181
182/// Key inside [`BUCKET_AGENT_CONFIG`] carrying the broadcast target
183/// version. Agents watch this key and self-update when their running
184/// version drifts.
185pub const KEY_AGENT_TARGET_VERSION: &str = "target_version";
186
187/// Sprint 6 layered-config keys inside [`BUCKET_AGENT_CONFIG`]:
188///   * `global`        — fleet-wide default ConfigScope JSON
189///   * `groups.<name>` — per-group override (partial ConfigScope)
190///   * `pcs.<pc_id>`   — per-pc override (partial ConfigScope)
191///
192/// The `groups.` / `pcs.` prefixes let a `kv.keys()` walk pick out
193/// just the rows in one scope when listing.
194pub const KEY_AGENT_CONFIG_GLOBAL: &str = "global";
195pub const PREFIX_AGENT_CONFIG_GROUPS: &str = "groups.";
196pub const PREFIX_AGENT_CONFIG_PCS: &str = "pcs.";
197
198pub fn agent_config_group_key(group: &str) -> String {
199    format!("{PREFIX_AGENT_CONFIG_GROUPS}{group}")
200}
201
202pub fn agent_config_pc_key(pc_id: &str) -> String {
203    format!("{PREFIX_AGENT_CONFIG_PCS}{pc_id}")
204}
205
206/// Inverse of [`agent_config_group_key`] — returns the bare group
207/// name if `key` carries the groups-scope prefix, else `None`.
208pub fn parse_agent_config_group_key(key: &str) -> Option<&str> {
209    key.strip_prefix(PREFIX_AGENT_CONFIG_GROUPS)
210}
211
212/// Inverse of [`agent_config_pc_key`].
213pub fn parse_agent_config_pc_key(key: &str) -> Option<&str> {
214    key.strip_prefix(PREFIX_AGENT_CONFIG_PCS)
215}
216
217pub const SCRIPT_STATUS_ACTIVE: &str = "ACTIVE";
218pub const SCRIPT_STATUS_REVOKED: &str = "REVOKED";
219
220pub const STREAM_INVENTORY: &str = "INVENTORY";
221pub const STREAM_RESULTS: &str = "RESULTS";
222pub const STREAM_EXEC: &str = "EXEC";
223pub const STREAM_EVENTS: &str = "EVENTS";
224pub const STREAM_AUDIT: &str = "AUDIT";
225
226/// JetStream stream backing the per-PC observability event pipeline
227/// (Issue #246). Distinct from [`STREAM_EVENTS`] (in-flight script
228/// lifecycle) — `STREAM_OBS_EVENTS` carries the timeline data the
229/// SPA's Events page consumes: sign-in/out, power on/off, sleep/
230/// resume, agent milestones, diagnostic bundle pointers. The agent
231/// publishes on `obs.<pc_id>` (see [`crate::subject::obs`]) and
232/// this stream catches everything matching [`crate::subject::OBS_FILTER`]
233/// so a backend that boots after the agent doesn't miss any
234/// already-emitted events.
235pub const STREAM_OBS_EVENTS: &str = "OBS_EVENTS";
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    /// NATS KV bucket names must be domain-safe ASCII (a-z, A-Z, 0-9, _, -).
242    /// Lock the constants down so a future edit doesn't introduce a `.` and
243    /// break create_key_value silently on the broker side.
244    #[test]
245    fn bucket_names_are_domain_safe() {
246        for name in [
247            BUCKET_SCRIPT_CURRENT,
248            BUCKET_SCRIPT_STATUS,
249            BUCKET_AGENTS_STATE,
250            BUCKET_AGENT_CONFIG,
251            BUCKET_AGENT_GROUPS,
252            BUCKET_SCHEDULES,
253            BUCKET_JOBS,
254            BUCKET_JOBS_YAML,
255            BUCKET_SCHEDULES_YAML,
256            BUCKET_FLEET_CONFIG,
257            BUCKET_SCHEDULER_DISPATCH,
258            OBJECT_AGENT_RELEASES,
259            OBJECT_APP_PACKAGES,
260            OBJECT_SCRIPTS,
261            OBJECT_RESULT_OUTPUT,
262        ] {
263            assert!(
264                !name.contains('.'),
265                "bucket name {name:?} contains a dot, which NATS KV rejects"
266            );
267            assert!(
268                name.chars()
269                    .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-'),
270                "bucket name {name:?} has non-domain-safe characters"
271            );
272        }
273    }
274
275    #[test]
276    fn stream_names_are_unique() {
277        let names = [
278            STREAM_INVENTORY,
279            STREAM_RESULTS,
280            STREAM_EXEC,
281            STREAM_EVENTS,
282            STREAM_AUDIT,
283            STREAM_OBS_EVENTS,
284        ];
285        let mut deduped = names.to_vec();
286        deduped.sort_unstable();
287        deduped.dedup();
288        assert_eq!(
289            deduped.len(),
290            names.len(),
291            "stream constants collide: {names:?}"
292        );
293    }
294
295    #[test]
296    fn script_status_strings() {
297        assert_eq!(SCRIPT_STATUS_ACTIVE, "ACTIVE");
298        assert_eq!(SCRIPT_STATUS_REVOKED, "REVOKED");
299        assert_ne!(SCRIPT_STATUS_ACTIVE, SCRIPT_STATUS_REVOKED);
300    }
301
302    #[test]
303    fn key_agent_target_version_constant() {
304        assert_eq!(KEY_AGENT_TARGET_VERSION, "target_version");
305    }
306
307    #[test]
308    fn agent_config_group_key_round_trips() {
309        let k = agent_config_group_key("canary");
310        assert_eq!(k, "groups.canary");
311        assert_eq!(parse_agent_config_group_key(&k), Some("canary"));
312    }
313
314    #[test]
315    fn agent_config_pc_key_round_trips() {
316        let k = agent_config_pc_key("PC-01");
317        assert_eq!(k, "pcs.PC-01");
318        assert_eq!(parse_agent_config_pc_key(&k), Some("PC-01"));
319    }
320
321    #[test]
322    fn dispatch_mark_keys_are_distinct_by_kind() {
323        // The whole-target key for one schedule must never equal the
324        // per-pc key for another — the `pc.` / `target.` prefixes keep
325        // the two namespaces apart even when ids look alike.
326        let per_pc = dispatch_mark_pc_key("collect-winlog-events", "PC-01");
327        let target = dispatch_mark_target_key("collect-winlog-events");
328        assert_eq!(per_pc, "pc.21.collect-winlog-events.5.PC-01");
329        assert_eq!(target, "target.21.collect-winlog-events");
330        assert_ne!(per_pc, target);
331        // A schedule literally named "collect-winlog-events.PC-01"
332        // still can't collide with the per-pc key above.
333        assert_ne!(
334            dispatch_mark_target_key("collect-winlog-events.PC-01"),
335            per_pc,
336        );
337    }
338
339    #[test]
340    fn dispatch_mark_pc_key_has_no_dot_collision() {
341        // Length-prefixing makes the encoding injective: a dotted
342        // schedule_id can't borrow a leading segment from the pc_id (or
343        // vice versa) to forge a colliding key. (CodeRabbit / claude #444.)
344        assert_ne!(
345            dispatch_mark_pc_key("a.b", "c"),
346            dispatch_mark_pc_key("a", "b.c"),
347        );
348        assert_ne!(
349            dispatch_mark_pc_key("x", "y.z"),
350            dispatch_mark_pc_key("x.y", "z"),
351        );
352        // Same components, swapped roles — also distinct.
353        assert_ne!(
354            dispatch_mark_pc_key("foo", "bar"),
355            dispatch_mark_pc_key("bar", "foo"),
356        );
357    }
358
359    #[test]
360    fn agent_config_scope_keys_do_not_collide() {
361        // Belt + braces: make sure no pc id starting with "groups." would
362        // be misparsed (or vice versa). The prefixes are distinct because
363        // they each end in `.` and the parent buckets disagree on what
364        // comes after — pcs holds host names, groups holds membership
365        // names — but locking the invariant in a test stops a future
366        // rename from breaking it.
367        assert_ne!(PREFIX_AGENT_CONFIG_GROUPS, PREFIX_AGENT_CONFIG_PCS);
368        assert!(parse_agent_config_group_key("pcs.someone").is_none());
369        assert!(parse_agent_config_pc_key("groups.someone").is_none());
370        assert_eq!(parse_agent_config_group_key("global"), None);
371        assert_eq!(parse_agent_config_pc_key("global"), None);
372    }
373}