Skip to main content

kanade_shared/
manifest.rs

1use serde::{Deserialize, Serialize};
2
3use crate::wire::{RunAs, Shell};
4
5/// YAML job manifest (= registered "what to run", v0.18.0+).
6///
7/// Owns only script-intrinsic fields. **Who** (`target`), **how to
8/// phase fanout** (`rollout`), and **when to stagger start**
9/// (`jitter`) all moved to the Schedule / exec request side — same
10/// script can now be fired against different targets / rollouts
11/// without copying the script body.
12///
13/// `deny_unknown_fields` makes operators copy-pasting an older yaml
14/// that still has `target:` / `rollout:` see a clear parse error at
15/// `kanade job create` time instead of mysteriously losing it.
16#[derive(Serialize, Deserialize, Debug, Clone)]
17#[serde(deny_unknown_fields)]
18pub struct Manifest {
19    pub id: String,
20    pub version: String,
21    #[serde(default)]
22    pub description: Option<String>,
23    pub execute: Execute,
24    #[serde(default)]
25    pub require_approval: bool,
26    /// Opt-in marker that this job produces a JSON inventory fact
27    /// payload on stdout. When present, the backend's results
28    /// projector parses `ExecResult.stdout` as JSON and upserts an
29    /// `inventory_facts` row keyed by `(pc_id, manifest.id)`. The
30    /// `display` sub-config drives the SPA's Inventory page render.
31    #[serde(default)]
32    pub inventory: Option<InventoryHint>,
33}
34
35/// "Who + how + when-to-stagger" — the fanout-plan side of an exec.
36/// Used both as the POST `/api/exec/{job_id}` body and as the embedded
37/// `target` / `rollout` / `jitter` slot on [`Schedule`]. Centralising
38/// here keeps the validation + serialisation logic in one place.
39#[derive(Serialize, Deserialize, Debug, Clone, Default)]
40pub struct FanoutPlan {
41    #[serde(default)]
42    pub target: Target,
43    /// Optional wave rollout — when present, the backend publishes
44    /// each wave's group subject on its own delay schedule instead
45    /// of fanning out the `target` block in one go. `target` then
46    /// only labels the deploy for the audit log.
47    #[serde(default, skip_serializing_if = "Option::is_none")]
48    pub rollout: Option<Rollout>,
49    /// Optional humantime jitter; agent uses it to randomise
50    /// execution start. Lives here (not on the script) so different
51    /// schedules / ad-hoc fires of the same job can pick different
52    /// stagger windows.
53    #[serde(default, skip_serializing_if = "Option::is_none")]
54    pub jitter: Option<String>,
55    /// Absolute time the scheduler stamps on each emitted Command
56    /// when this exec was driven by a [`Schedule`] with
57    /// `starting_deadline`. Agents receiving a Command after this
58    /// instant publish a synthetic skipped-result instead of
59    /// running the script. `None` (default) = no deadline / catch
60    /// up whenever delivered. Operators don't usually set this
61    /// directly — the scheduler computes it from `tick_at +
62    /// starting_deadline`.
63    #[serde(default, skip_serializing_if = "Option::is_none")]
64    pub deadline_at: Option<chrono::DateTime<chrono::Utc>>,
65}
66
67/// Manifest sub-section: how the SPA should render the inventory
68/// facts this job produces. Each field name (`field`) is a top-level
69/// key in the stdout JSON, e.g. `hostname`, `ram_gb`.
70///
71/// Two render modes:
72///   * `display` — vertical "field / value" per PC, used by the
73///     `/inventory?pc=<id>` detail view. ALL columns the operator
74///     wants visible on the detail page.
75///   * `summary` — horizontal table across the fleet (row = PC,
76///     column = field) on `/inventory`. Optional; when omitted the
77///     SPA falls back to `display`, but operators usually want a
78///     trimmer "hostname / OS / CPU / RAM" set for the fleet view.
79#[derive(Serialize, Deserialize, Debug, Clone)]
80pub struct InventoryHint {
81    /// Detail-view columns, in order.
82    pub display: Vec<DisplayField>,
83    /// Optional fleet-list columns (row = PC). Defaults to `display`
84    /// when omitted, but operators usually pick a 3-5 column subset.
85    #[serde(default, skip_serializing_if = "Option::is_none")]
86    pub summary: Option<Vec<DisplayField>>,
87}
88
89#[derive(Serialize, Deserialize, Debug, Clone)]
90pub struct DisplayField {
91    /// Top-level key in the stdout JSON.
92    pub field: String,
93    /// Human-readable column header.
94    pub label: String,
95    /// Optional render hint — `"number"`, `"bytes"`, `"timestamp"`.
96    /// Defaults to plain text rendering on the SPA side.
97    #[serde(default, skip_serializing_if = "Option::is_none")]
98    #[serde(rename = "type")]
99    pub kind: Option<String>,
100}
101
102#[derive(Serialize, Deserialize, Debug, Clone)]
103pub struct Rollout {
104    #[serde(default)]
105    pub strategy: RolloutStrategy,
106    pub waves: Vec<Wave>,
107}
108
109#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)]
110#[serde(rename_all = "lowercase")]
111pub enum RolloutStrategy {
112    #[default]
113    Wave,
114}
115
116#[derive(Serialize, Deserialize, Debug, Clone)]
117pub struct Wave {
118    pub group: String,
119    /// humantime delay measured from the deploy's publish time. wave[0]
120    /// typically has "0s"; subsequent waves use minutes / hours.
121    pub delay: String,
122}
123
124#[derive(Serialize, Deserialize, Debug, Clone, Default)]
125pub struct Target {
126    #[serde(default)]
127    pub groups: Vec<String>,
128    #[serde(default)]
129    pub pcs: Vec<String>,
130    #[serde(default)]
131    pub all: bool,
132}
133
134impl Target {
135    /// At least one of all / groups / pcs is set.
136    pub fn is_specified(&self) -> bool {
137        self.all || !self.groups.is_empty() || !self.pcs.is_empty()
138    }
139}
140
141#[derive(Serialize, Deserialize, Debug, Clone)]
142pub struct Execute {
143    pub shell: ExecuteShell,
144    pub script: String,
145    /// humantime duration string (e.g. "30s", "10m"). Script-intrinsic
146    /// — represents how long this script reasonably takes to run.
147    pub timeout: String,
148    /// Token + session combination the agent uses to launch the
149    /// script (v0.21). Default = [`RunAs::System`] (Session 0,
150    /// LocalSystem privileges, no GUI) — matches pre-v0.21 behavior.
151    #[serde(default)]
152    pub run_as: RunAs,
153    /// Working directory for the spawned child (v0.21.1). When
154    /// unset, the child inherits the agent's cwd — on Windows that
155    /// means `%SystemRoot%\System32` for the prod service, which is
156    /// almost never what operators actually want. Use an absolute
157    /// path; relative paths are passed through to the OS verbatim.
158    /// `%PROGRAMDATA%` works for `run_as: system`; for `run_as: user`
159    /// you'd want `%USERPROFILE%` (but expansion happens in the
160    /// shell, so write `$env:USERPROFILE` for PowerShell, or set
161    /// it via teravars before `kanade job create`).
162    #[serde(default, skip_serializing_if = "Option::is_none")]
163    pub cwd: Option<String>,
164}
165
166#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
167#[serde(rename_all = "lowercase")]
168pub enum ExecuteShell {
169    Powershell,
170    Cmd,
171}
172
173impl From<ExecuteShell> for Shell {
174    fn from(s: ExecuteShell) -> Self {
175        match s {
176            ExecuteShell::Powershell => Shell::Powershell,
177            ExecuteShell::Cmd => Shell::Cmd,
178        }
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn target_is_specified_requires_at_least_one_field() {
188        let empty = Target::default();
189        assert!(!empty.is_specified());
190
191        let with_all = Target {
192            all: true,
193            ..Target::default()
194        };
195        assert!(with_all.is_specified());
196
197        let with_groups = Target {
198            groups: vec!["canary".into()],
199            ..Target::default()
200        };
201        assert!(with_groups.is_specified());
202
203        let with_pcs = Target {
204            pcs: vec!["minipc".into()],
205            ..Target::default()
206        };
207        assert!(with_pcs.is_specified());
208    }
209
210    #[test]
211    fn manifest_deserialises_minimal_yaml() {
212        // Matches jobs/echo-test.yaml. v0.18: no target/rollout/jitter
213        // — those live on the schedule / exec request now.
214        let yaml = r#"
215id: echo-test
216version: 0.0.1
217execute:
218  shell: powershell
219  script: "echo 'kanade'"
220  timeout: 30s
221"#;
222        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
223        assert_eq!(m.id, "echo-test");
224        assert_eq!(m.version, "0.0.1");
225        assert!(matches!(m.execute.shell, ExecuteShell::Powershell));
226        assert_eq!(m.execute.script.trim(), "echo 'kanade'");
227        assert_eq!(m.execute.timeout, "30s");
228        assert!(!m.require_approval);
229    }
230
231    #[test]
232    fn schedule_carries_target_and_rollout() {
233        let yaml = r#"
234id: hourly-cleanup-canary
235cron: "0 0 * * * *"
236job_id: cleanup
237enabled: true
238target:
239  groups: [canary, wave1]
240jitter: 30s
241rollout:
242  strategy: wave
243  waves:
244    - { group: canary, delay: 0s }
245    - { group: wave1,  delay: 5s }
246"#;
247        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
248        assert_eq!(s.id, "hourly-cleanup-canary");
249        assert_eq!(s.job_id, "cleanup");
250        assert_eq!(s.plan.target.groups, vec!["canary", "wave1"]);
251        assert_eq!(s.plan.jitter.as_deref(), Some("30s"));
252        let rollout = s.plan.rollout.expect("rollout present");
253        assert_eq!(rollout.waves.len(), 2);
254        assert_eq!(rollout.waves[0].group, "canary");
255        assert_eq!(rollout.waves[1].delay, "5s");
256        assert_eq!(rollout.strategy, RolloutStrategy::Wave);
257    }
258
259    #[test]
260    fn schedule_minimal_target_all() {
261        let yaml = r#"
262id: every-10s
263cron: "*/10 * * * * *"
264enabled: true
265job_id: scheduled-echo
266target: { all: true }
267"#;
268        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
269        assert_eq!(s.id, "every-10s");
270        assert_eq!(s.cron, "*/10 * * * * *");
271        assert!(s.enabled);
272        assert_eq!(s.job_id, "scheduled-echo");
273        assert!(s.plan.target.all);
274        assert!(s.plan.rollout.is_none());
275        assert!(s.plan.jitter.is_none());
276    }
277
278    #[test]
279    fn schedule_enabled_defaults_to_true() {
280        let yaml = r#"
281id: x
282cron: "* * * * * *"
283job_id: y
284target: { all: true }
285"#;
286        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
287        assert!(s.enabled);
288    }
289
290    #[test]
291    fn schedule_mode_defaults_to_every_tick() {
292        let yaml = r#"
293id: x
294cron: "* * * * * *"
295job_id: y
296target: { all: true }
297"#;
298        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
299        assert_eq!(s.mode, ExecMode::EveryTick);
300        assert!(s.cooldown.is_none());
301        assert!(!s.auto_disable_when_done);
302    }
303
304    #[test]
305    fn schedule_mode_serialises_snake_case() {
306        for (mode, expected) in [
307            (ExecMode::EveryTick, "every_tick"),
308            (ExecMode::OncePerPc, "once_per_pc"),
309            (ExecMode::OncePerTarget, "once_per_target"),
310        ] {
311            let s = serde_json::to_value(mode).expect("serialise");
312            assert_eq!(s, serde_json::Value::String(expected.into()));
313            let back: ExecMode = serde_json::from_value(serde_json::Value::String(expected.into()))
314                .expect("deserialise");
315            assert_eq!(back, mode, "round-trip for {expected}");
316        }
317    }
318
319    #[test]
320    fn schedule_kitting_yaml_parses() {
321        let yaml = r#"
322id: kitting-setup
323cron: "*/30 * * * * *"
324job_id: install-baseline
325target: { all: true }
326mode: once_per_pc
327"#;
328        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
329        assert_eq!(s.mode, ExecMode::OncePerPc);
330        assert!(s.cooldown.is_none());
331        assert!(!s.auto_disable_when_done);
332    }
333
334    #[test]
335    fn schedule_batch_campaign_yaml_parses() {
336        let yaml = r#"
337id: q3-patch-batch
338cron: "*/5 * * * * *"
339job_id: install-patch
340target:
341  pcs: [pc-001, pc-002, pc-003]
342mode: once_per_pc
343auto_disable_when_done: true
344"#;
345        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
346        assert_eq!(s.mode, ExecMode::OncePerPc);
347        assert!(s.cooldown.is_none());
348        assert!(s.auto_disable_when_done);
349        assert_eq!(s.plan.target.pcs.len(), 3);
350    }
351
352    #[test]
353    fn schedule_throttled_yaml_parses() {
354        let yaml = r#"
355id: daily-compliance
356cron: "*/5 * * * * *"
357job_id: check-av-status
358target: { all: true }
359mode: once_per_pc
360cooldown: 1d
361"#;
362        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
363        assert_eq!(s.mode, ExecMode::OncePerPc);
364        assert_eq!(s.cooldown.as_deref(), Some("1d"));
365    }
366
367    #[test]
368    fn schedule_runs_on_defaults_to_backend() {
369        let yaml = r#"
370id: x
371cron: "* * * * * *"
372job_id: y
373target: { all: true }
374"#;
375        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
376        assert_eq!(s.runs_on, RunsOn::Backend);
377    }
378
379    #[test]
380    fn schedule_runs_on_agent_parses() {
381        let yaml = r#"
382id: offline-inv
383cron: "0 0 * * * *"
384job_id: inventory-hw
385target: { all: true }
386runs_on: agent
387mode: once_per_pc
388"#;
389        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
390        assert_eq!(s.runs_on, RunsOn::Agent);
391        assert_eq!(s.mode, ExecMode::OncePerPc);
392    }
393
394    #[test]
395    fn runs_on_serialises_snake_case() {
396        for (mode, expected) in [(RunsOn::Backend, "backend"), (RunsOn::Agent, "agent")] {
397            let s = serde_json::to_value(mode).expect("serialise");
398            assert_eq!(s, serde_json::Value::String(expected.into()));
399            let back: RunsOn = serde_json::from_value(serde_json::Value::String(expected.into()))
400                .expect("deserialise");
401            assert_eq!(back, mode);
402        }
403    }
404
405    #[test]
406    fn schedule_once_per_target_yaml_parses() {
407        let yaml = r#"
408id: license-checkin
409cron: "*/10 * * * * *"
410job_id: hit-license-server
411target: { all: true }
412mode: once_per_target
413cooldown: 24h
414"#;
415        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
416        assert_eq!(s.mode, ExecMode::OncePerTarget);
417        assert_eq!(s.cooldown.as_deref(), Some("24h"));
418    }
419
420    #[test]
421    fn execute_shell_into_wire_shell() {
422        assert_eq!(Shell::from(ExecuteShell::Powershell), Shell::Powershell);
423        assert_eq!(Shell::from(ExecuteShell::Cmd), Shell::Cmd);
424    }
425
426    #[test]
427    fn missing_required_field_errors() {
428        // `id` missing.
429        let yaml = r#"
430version: 1.0.0
431target: { all: true }
432execute:
433  shell: powershell
434  script: "echo"
435  timeout: 1s
436"#;
437        let r: Result<Manifest, _> = serde_yaml::from_str(yaml);
438        assert!(r.is_err(), "expected error, got {:?}", r);
439    }
440}
441
442/// Periodic schedule (spec §2.4.3). v0.18.0 carries the fanout plan
443/// (target + optional rollout + optional jitter) inline; the
444/// referenced job (`job_id` → [`BUCKET_JOBS`]) supplies only the
445/// script body. Two schedules of the same job can target different
446/// groups on different cadences without copying the manifest.
447#[derive(Serialize, Deserialize, Debug, Clone)]
448pub struct Schedule {
449    pub id: String,
450    /// 6-field cron expression (`sec min hour day month day-of-week`),
451    /// matching `tokio-cron-scheduler` syntax.
452    pub cron: String,
453    /// Key into [`crate::kv::BUCKET_JOBS`]. Must equal a registered
454    /// Manifest's `id`.
455    pub job_id: String,
456    /// Who + how-to-phase + when-to-stagger. The Manifest doesn't
457    /// carry these any more — same job + different fanout = different
458    /// schedule.
459    #[serde(flatten)]
460    pub plan: FanoutPlan,
461    /// Per-pc/per-target dedup semantics (v0.19). Default
462    /// `EveryTick` keeps the historical "fire every cron tick at the
463    /// whole target" behavior.
464    #[serde(default)]
465    pub mode: ExecMode,
466    /// Humantime cooldown for `OncePerPc` / `OncePerTarget`. Once a
467    /// pc/target has succeeded, the scheduler waits this long before
468    /// considering it eligible again. Omit for "succeed once, then
469    /// permanently skip" — i.e. cooldown = infinity.
470    #[serde(default, skip_serializing_if = "Option::is_none")]
471    pub cooldown: Option<String>,
472    /// When true AND the schedule's lifecycle is permanently
473    /// terminated (`cooldown = None` + dedup says nothing more to
474    /// do), the scheduler flips `enabled = false` and emits an
475    /// audit event. No-op when `cooldown` is set (re-arming
476    /// schedules never finish).
477    #[serde(default)]
478    pub auto_disable_when_done: bool,
479    /// v0.22: optional humantime window after a cron tick during
480    /// which the Command is still considered "live". The scheduler
481    /// computes `tick_at + starting_deadline` and stamps it onto
482    /// each Command as `deadline_at`; agents skip Commands they
483    /// receive after that absolute time. `None` (default) = no
484    /// deadline, meaning a Command queued in the broker / stream
485    /// during agent downtime runs whenever the agent reconnects —
486    /// good for kitting / inventory / cleanup. Set this for
487    /// time-of-day notifications, lunch reminders, etc., where
488    /// "fire 3 hours late" would be wrong.
489    #[serde(default, skip_serializing_if = "Option::is_none")]
490    pub starting_deadline: Option<String>,
491    /// v0.23: where does the cron tick happen? `Backend` (default,
492    /// historical) = backend's scheduler fires Commands via NATS;
493    /// agents passively receive. `Agent` = each targeted agent runs
494    /// its own internal cron and fires locally, so the schedule
495    /// keeps ticking even when the broker is unreachable (laptop on
496    /// the train, broker maintenance window, full WAN outage). The
497    /// two locations are mutually exclusive — when `Agent`, the
498    /// backend scheduler stays out and just keeps the definition in
499    /// KV for agents to read.
500    #[serde(default)]
501    pub runs_on: RunsOn,
502    #[serde(default = "default_true")]
503    pub enabled: bool,
504}
505
506/// v0.23 — where the cron tick fires from.
507#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)]
508#[serde(rename_all = "snake_case")]
509pub enum RunsOn {
510    /// Backend's central scheduler ticks and publishes Commands to
511    /// NATS. Historical default, what every pre-v0.23 schedule
512    /// uses. Agent offline ⇒ Command queued in STREAM_EXEC; agent
513    /// reconnects ⇒ catch-up via [`command_replay`](crate)
514    /// (see kanade-agent's command_replay module).
515    #[default]
516    Backend,
517    /// Each targeted agent runs the cron tick locally. Survives
518    /// broker / WAN outages. Best for laptops / mobile devices that
519    /// roam off the corporate network. Agent must be online for the
520    /// initial schedule + job-catalog pull, but once cached the
521    /// agent fires the script standalone.
522    Agent,
523}
524
525/// Per-pc/per-target dedup semantics for a [`Schedule`] (v0.19).
526#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)]
527#[serde(rename_all = "snake_case")]
528pub enum ExecMode {
529    /// Fire on every cron tick at the whole target. Historical
530    /// (pre-v0.19) behavior; no dedup.
531    #[default]
532    EveryTick,
533    /// Fire at each pc until that pc succeeds; then skip it until
534    /// the optional cooldown elapses (or forever if no cooldown).
535    /// Use for kitting / first-boot / per-pc compliance checks.
536    OncePerPc,
537    /// Fire at the whole target until **any** pc succeeds; then
538    /// skip the whole target until the optional cooldown elapses
539    /// (or forever if no cooldown). Use for "one delegate is
540    /// enough" tasks like license check-in.
541    OncePerTarget,
542}
543
544fn default_true() -> bool {
545    true
546}