Skip to main content

kanade_shared/
manifest.rs

1use serde::{Deserialize, Serialize};
2
3use crate::wire::{RunAs, Shell, Staleness};
4
5/// YAML job manifest (= registered "what to run", v0.18.0+).
6///
7/// Owns only script-intrinsic fields. **Who** (`target`), **how to
8/// phase fanout** (`rollout`), and **when to stagger start**
9/// (`jitter`) all moved to the Schedule / exec request side — same
10/// script can now be fired against different targets / rollouts
11/// without copying the script body.
12///
13/// `deny_unknown_fields` makes operators copy-pasting an older yaml
14/// that still has `target:` / `rollout:` see a clear parse error at
15/// `kanade job create` time instead of mysteriously losing it.
16#[derive(Serialize, Deserialize, Debug, Clone)]
17#[serde(deny_unknown_fields)]
18pub struct Manifest {
19    pub id: String,
20    pub version: String,
21    #[serde(default)]
22    pub description: Option<String>,
23    pub execute: Execute,
24    #[serde(default)]
25    pub require_approval: bool,
26    /// Opt-in marker that this job produces a JSON inventory fact
27    /// payload on stdout. When present, the backend's results
28    /// projector parses `ExecResult.stdout` as JSON and upserts an
29    /// `inventory_facts` row keyed by `(pc_id, manifest.id)`. The
30    /// `display` sub-config drives the SPA's Inventory page render.
31    #[serde(default)]
32    pub inventory: Option<InventoryHint>,
33    /// v0.26: Layer 2 staleness policy (SPEC.md §2.6.2). Controls
34    /// what the agent does at fire time when it can't verify the
35    /// `script_current` / `script_status` KV values are fresh —
36    /// especially relevant for `runs_on: agent` schedules where
37    /// the agent may fire from cache while offline. Defaults to
38    /// `Staleness::Cached` (silently use cached values), which
39    /// matches every pre-v0.26 Manifest.
40    #[serde(default)]
41    pub staleness: Staleness,
42}
43
44/// "Who + how + when-to-stagger" — the fanout-plan side of an exec.
45/// Used both as the POST `/api/exec/{job_id}` body and as the embedded
46/// `target` / `rollout` / `jitter` slot on [`Schedule`]. Centralising
47/// here keeps the validation + serialisation logic in one place.
48#[derive(Serialize, Deserialize, Debug, Clone, Default)]
49pub struct FanoutPlan {
50    #[serde(default)]
51    pub target: Target,
52    /// Optional wave rollout — when present, the backend publishes
53    /// each wave's group subject on its own delay schedule instead
54    /// of fanning out the `target` block in one go. `target` then
55    /// only labels the deploy for the audit log.
56    #[serde(default, skip_serializing_if = "Option::is_none")]
57    pub rollout: Option<Rollout>,
58    /// Optional humantime jitter; agent uses it to randomise
59    /// execution start. Lives here (not on the script) so different
60    /// schedules / ad-hoc fires of the same job can pick different
61    /// stagger windows.
62    #[serde(default, skip_serializing_if = "Option::is_none")]
63    pub jitter: Option<String>,
64    /// Absolute time the scheduler stamps on each emitted Command
65    /// when this exec was driven by a [`Schedule`] with
66    /// `starting_deadline`. Agents receiving a Command after this
67    /// instant publish a synthetic skipped-result instead of
68    /// running the script. `None` (default) = no deadline / catch
69    /// up whenever delivered. Operators don't usually set this
70    /// directly — the scheduler computes it from `tick_at +
71    /// starting_deadline`.
72    #[serde(default, skip_serializing_if = "Option::is_none")]
73    pub deadline_at: Option<chrono::DateTime<chrono::Utc>>,
74}
75
76/// Manifest sub-section: how the SPA should render the inventory
77/// facts this job produces. Each field name (`field`) is a top-level
78/// key in the stdout JSON, e.g. `hostname`, `ram_gb`.
79///
80/// Two render modes:
81///   * `display` — vertical "field / value" per PC, used by the
82///     `/inventory?pc=<id>` detail view. ALL columns the operator
83///     wants visible on the detail page.
84///   * `summary` — horizontal table across the fleet (row = PC,
85///     column = field) on `/inventory`. Optional; when omitted the
86///     SPA falls back to `display`, but operators usually want a
87///     trimmer "hostname / OS / CPU / RAM" set for the fleet view.
88#[derive(Serialize, Deserialize, Debug, Clone)]
89pub struct InventoryHint {
90    /// Detail-view columns, in order.
91    pub display: Vec<DisplayField>,
92    /// Optional fleet-list columns (row = PC). Defaults to `display`
93    /// when omitted, but operators usually pick a 3-5 column subset.
94    #[serde(default, skip_serializing_if = "Option::is_none")]
95    pub summary: Option<Vec<DisplayField>>,
96}
97
98#[derive(Serialize, Deserialize, Debug, Clone)]
99pub struct DisplayField {
100    /// Top-level key in the stdout JSON.
101    pub field: String,
102    /// Human-readable column header.
103    pub label: String,
104    /// Optional render hint — `"number"`, `"bytes"`, `"timestamp"`.
105    /// Defaults to plain text rendering on the SPA side.
106    #[serde(default, skip_serializing_if = "Option::is_none")]
107    #[serde(rename = "type")]
108    pub kind: Option<String>,
109}
110
111#[derive(Serialize, Deserialize, Debug, Clone)]
112pub struct Rollout {
113    #[serde(default)]
114    pub strategy: RolloutStrategy,
115    pub waves: Vec<Wave>,
116}
117
118#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)]
119#[serde(rename_all = "lowercase")]
120pub enum RolloutStrategy {
121    #[default]
122    Wave,
123}
124
125#[derive(Serialize, Deserialize, Debug, Clone)]
126pub struct Wave {
127    pub group: String,
128    /// humantime delay measured from the deploy's publish time. wave[0]
129    /// typically has "0s"; subsequent waves use minutes / hours.
130    pub delay: String,
131}
132
133#[derive(Serialize, Deserialize, Debug, Clone, Default)]
134pub struct Target {
135    #[serde(default)]
136    pub groups: Vec<String>,
137    #[serde(default)]
138    pub pcs: Vec<String>,
139    #[serde(default)]
140    pub all: bool,
141}
142
143impl Target {
144    /// At least one of all / groups / pcs is set.
145    pub fn is_specified(&self) -> bool {
146        self.all || !self.groups.is_empty() || !self.pcs.is_empty()
147    }
148}
149
150#[derive(Serialize, Deserialize, Debug, Clone)]
151pub struct Execute {
152    pub shell: ExecuteShell,
153    pub script: String,
154    /// humantime duration string (e.g. "30s", "10m"). Script-intrinsic
155    /// — represents how long this script reasonably takes to run.
156    pub timeout: String,
157    /// Token + session combination the agent uses to launch the
158    /// script (v0.21). Default = [`RunAs::System`] (Session 0,
159    /// LocalSystem privileges, no GUI) — matches pre-v0.21 behavior.
160    #[serde(default)]
161    pub run_as: RunAs,
162    /// Working directory for the spawned child (v0.21.1). When
163    /// unset, the child inherits the agent's cwd — on Windows that
164    /// means `%SystemRoot%\System32` for the prod service, which is
165    /// almost never what operators actually want. Use an absolute
166    /// path; relative paths are passed through to the OS verbatim.
167    /// `%PROGRAMDATA%` works for `run_as: system`; for `run_as: user`
168    /// you'd want `%USERPROFILE%` (but expansion happens in the
169    /// shell, so write `$env:USERPROFILE` for PowerShell, or set
170    /// it via teravars before `kanade job create`).
171    #[serde(default, skip_serializing_if = "Option::is_none")]
172    pub cwd: Option<String>,
173}
174
175#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
176#[serde(rename_all = "lowercase")]
177pub enum ExecuteShell {
178    Powershell,
179    Cmd,
180}
181
182impl From<ExecuteShell> for Shell {
183    fn from(s: ExecuteShell) -> Self {
184        match s {
185            ExecuteShell::Powershell => Shell::Powershell,
186            ExecuteShell::Cmd => Shell::Cmd,
187        }
188    }
189}
190
191#[cfg(test)]
192mod tests {
193    use super::*;
194
195    #[test]
196    fn target_is_specified_requires_at_least_one_field() {
197        let empty = Target::default();
198        assert!(!empty.is_specified());
199
200        let with_all = Target {
201            all: true,
202            ..Target::default()
203        };
204        assert!(with_all.is_specified());
205
206        let with_groups = Target {
207            groups: vec!["canary".into()],
208            ..Target::default()
209        };
210        assert!(with_groups.is_specified());
211
212        let with_pcs = Target {
213            pcs: vec!["minipc".into()],
214            ..Target::default()
215        };
216        assert!(with_pcs.is_specified());
217    }
218
219    #[test]
220    fn manifest_deserialises_minimal_yaml() {
221        // Matches jobs/echo-test.yaml. v0.18: no target/rollout/jitter
222        // — those live on the schedule / exec request now.
223        let yaml = r#"
224id: echo-test
225version: 0.0.1
226execute:
227  shell: powershell
228  script: "echo 'kanade'"
229  timeout: 30s
230"#;
231        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
232        assert_eq!(m.id, "echo-test");
233        assert_eq!(m.version, "0.0.1");
234        assert!(matches!(m.execute.shell, ExecuteShell::Powershell));
235        assert_eq!(m.execute.script.trim(), "echo 'kanade'");
236        assert_eq!(m.execute.timeout, "30s");
237        assert!(!m.require_approval);
238    }
239
240    #[test]
241    fn schedule_carries_target_and_rollout() {
242        let yaml = r#"
243id: hourly-cleanup-canary
244cron: "0 0 * * * *"
245job_id: cleanup
246enabled: true
247target:
248  groups: [canary, wave1]
249jitter: 30s
250rollout:
251  strategy: wave
252  waves:
253    - { group: canary, delay: 0s }
254    - { group: wave1,  delay: 5s }
255"#;
256        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
257        assert_eq!(s.id, "hourly-cleanup-canary");
258        assert_eq!(s.job_id, "cleanup");
259        assert_eq!(s.plan.target.groups, vec!["canary", "wave1"]);
260        assert_eq!(s.plan.jitter.as_deref(), Some("30s"));
261        let rollout = s.plan.rollout.expect("rollout present");
262        assert_eq!(rollout.waves.len(), 2);
263        assert_eq!(rollout.waves[0].group, "canary");
264        assert_eq!(rollout.waves[1].delay, "5s");
265        assert_eq!(rollout.strategy, RolloutStrategy::Wave);
266    }
267
268    #[test]
269    fn schedule_minimal_target_all() {
270        let yaml = r#"
271id: every-10s
272cron: "*/10 * * * * *"
273enabled: true
274job_id: scheduled-echo
275target: { all: true }
276"#;
277        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
278        assert_eq!(s.id, "every-10s");
279        assert_eq!(s.cron, "*/10 * * * * *");
280        assert!(s.enabled);
281        assert_eq!(s.job_id, "scheduled-echo");
282        assert!(s.plan.target.all);
283        assert!(s.plan.rollout.is_none());
284        assert!(s.plan.jitter.is_none());
285    }
286
287    #[test]
288    fn schedule_enabled_defaults_to_true() {
289        let yaml = r#"
290id: x
291cron: "* * * * * *"
292job_id: y
293target: { all: true }
294"#;
295        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
296        assert!(s.enabled);
297    }
298
299    #[test]
300    fn schedule_mode_defaults_to_every_tick() {
301        let yaml = r#"
302id: x
303cron: "* * * * * *"
304job_id: y
305target: { all: true }
306"#;
307        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
308        assert_eq!(s.mode, ExecMode::EveryTick);
309        assert!(s.cooldown.is_none());
310        assert!(!s.auto_disable_when_done);
311    }
312
313    #[test]
314    fn schedule_mode_serialises_snake_case() {
315        for (mode, expected) in [
316            (ExecMode::EveryTick, "every_tick"),
317            (ExecMode::OncePerPc, "once_per_pc"),
318            (ExecMode::OncePerTarget, "once_per_target"),
319        ] {
320            let s = serde_json::to_value(mode).expect("serialise");
321            assert_eq!(s, serde_json::Value::String(expected.into()));
322            let back: ExecMode = serde_json::from_value(serde_json::Value::String(expected.into()))
323                .expect("deserialise");
324            assert_eq!(back, mode, "round-trip for {expected}");
325        }
326    }
327
328    #[test]
329    fn schedule_kitting_yaml_parses() {
330        let yaml = r#"
331id: kitting-setup
332cron: "*/30 * * * * *"
333job_id: install-baseline
334target: { all: true }
335mode: once_per_pc
336"#;
337        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
338        assert_eq!(s.mode, ExecMode::OncePerPc);
339        assert!(s.cooldown.is_none());
340        assert!(!s.auto_disable_when_done);
341    }
342
343    #[test]
344    fn schedule_batch_campaign_yaml_parses() {
345        let yaml = r#"
346id: q3-patch-batch
347cron: "*/5 * * * * *"
348job_id: install-patch
349target:
350  pcs: [pc-001, pc-002, pc-003]
351mode: once_per_pc
352auto_disable_when_done: true
353"#;
354        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
355        assert_eq!(s.mode, ExecMode::OncePerPc);
356        assert!(s.cooldown.is_none());
357        assert!(s.auto_disable_when_done);
358        assert_eq!(s.plan.target.pcs.len(), 3);
359    }
360
361    #[test]
362    fn schedule_throttled_yaml_parses() {
363        let yaml = r#"
364id: daily-compliance
365cron: "*/5 * * * * *"
366job_id: check-av-status
367target: { all: true }
368mode: once_per_pc
369cooldown: 1d
370"#;
371        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
372        assert_eq!(s.mode, ExecMode::OncePerPc);
373        assert_eq!(s.cooldown.as_deref(), Some("1d"));
374    }
375
376    #[test]
377    fn schedule_runs_on_defaults_to_backend() {
378        let yaml = r#"
379id: x
380cron: "* * * * * *"
381job_id: y
382target: { all: true }
383"#;
384        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
385        assert_eq!(s.runs_on, RunsOn::Backend);
386    }
387
388    #[test]
389    fn schedule_runs_on_agent_parses() {
390        let yaml = r#"
391id: offline-inv
392cron: "0 0 * * * *"
393job_id: inventory-hw
394target: { all: true }
395runs_on: agent
396mode: once_per_pc
397"#;
398        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
399        assert_eq!(s.runs_on, RunsOn::Agent);
400        assert_eq!(s.mode, ExecMode::OncePerPc);
401    }
402
403    #[test]
404    fn runs_on_serialises_snake_case() {
405        for (mode, expected) in [(RunsOn::Backend, "backend"), (RunsOn::Agent, "agent")] {
406            let s = serde_json::to_value(mode).expect("serialise");
407            assert_eq!(s, serde_json::Value::String(expected.into()));
408            let back: RunsOn = serde_json::from_value(serde_json::Value::String(expected.into()))
409                .expect("deserialise");
410            assert_eq!(back, mode);
411        }
412    }
413
414    #[test]
415    fn schedule_once_per_target_yaml_parses() {
416        let yaml = r#"
417id: license-checkin
418cron: "*/10 * * * * *"
419job_id: hit-license-server
420target: { all: true }
421mode: once_per_target
422cooldown: 24h
423"#;
424        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
425        assert_eq!(s.mode, ExecMode::OncePerTarget);
426        assert_eq!(s.cooldown.as_deref(), Some("24h"));
427    }
428
429    #[test]
430    fn execute_shell_into_wire_shell() {
431        assert_eq!(Shell::from(ExecuteShell::Powershell), Shell::Powershell);
432        assert_eq!(Shell::from(ExecuteShell::Cmd), Shell::Cmd);
433    }
434
435    #[test]
436    fn manifest_staleness_defaults_to_cached() {
437        let yaml = r#"
438id: x
439version: 1.0.0
440execute:
441  shell: powershell
442  script: "echo"
443  timeout: 1s
444"#;
445        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
446        assert_eq!(m.staleness, Staleness::Cached);
447    }
448
449    #[test]
450    fn manifest_strict_staleness_parses() {
451        let yaml = r#"
452id: urgent-patch
453version: 2.5.1
454execute:
455  shell: powershell
456  script: Install-Hotfix
457  timeout: 5m
458staleness:
459  mode: strict
460  max_cache_age: 0s
461"#;
462        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
463        match m.staleness {
464            Staleness::Strict { max_cache_age } => assert_eq!(max_cache_age, "0s"),
465            other => panic!("expected strict, got {other:?}"),
466        }
467    }
468
469    #[test]
470    fn manifest_unchecked_staleness_parses() {
471        let yaml = r#"
472id: legacy
473version: 0.1.0
474execute:
475  shell: cmd
476  script: "echo"
477  timeout: 1s
478staleness:
479  mode: unchecked
480"#;
481        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
482        assert_eq!(m.staleness, Staleness::Unchecked);
483    }
484
485    #[test]
486    fn missing_required_field_errors() {
487        // `id` missing.
488        let yaml = r#"
489version: 1.0.0
490target: { all: true }
491execute:
492  shell: powershell
493  script: "echo"
494  timeout: 1s
495"#;
496        let r: Result<Manifest, _> = serde_yaml::from_str(yaml);
497        assert!(r.is_err(), "expected error, got {:?}", r);
498    }
499}
500
501/// Periodic schedule (spec §2.4.3). v0.18.0 carries the fanout plan
502/// (target + optional rollout + optional jitter) inline; the
503/// referenced job (`job_id` → [`BUCKET_JOBS`]) supplies only the
504/// script body. Two schedules of the same job can target different
505/// groups on different cadences without copying the manifest.
506#[derive(Serialize, Deserialize, Debug, Clone)]
507pub struct Schedule {
508    pub id: String,
509    /// 6-field cron expression (`sec min hour day month day-of-week`),
510    /// matching `tokio-cron-scheduler` syntax.
511    pub cron: String,
512    /// Key into [`crate::kv::BUCKET_JOBS`]. Must equal a registered
513    /// Manifest's `id`.
514    pub job_id: String,
515    /// Who + how-to-phase + when-to-stagger. The Manifest doesn't
516    /// carry these any more — same job + different fanout = different
517    /// schedule.
518    #[serde(flatten)]
519    pub plan: FanoutPlan,
520    /// Per-pc/per-target dedup semantics (v0.19). Default
521    /// `EveryTick` keeps the historical "fire every cron tick at the
522    /// whole target" behavior.
523    #[serde(default)]
524    pub mode: ExecMode,
525    /// Humantime cooldown for `OncePerPc` / `OncePerTarget`. Once a
526    /// pc/target has succeeded, the scheduler waits this long before
527    /// considering it eligible again. Omit for "succeed once, then
528    /// permanently skip" — i.e. cooldown = infinity.
529    #[serde(default, skip_serializing_if = "Option::is_none")]
530    pub cooldown: Option<String>,
531    /// When true AND the schedule's lifecycle is permanently
532    /// terminated (`cooldown = None` + dedup says nothing more to
533    /// do), the scheduler flips `enabled = false` and emits an
534    /// audit event. No-op when `cooldown` is set (re-arming
535    /// schedules never finish).
536    #[serde(default)]
537    pub auto_disable_when_done: bool,
538    /// v0.22: optional humantime window after a cron tick during
539    /// which the Command is still considered "live". The scheduler
540    /// computes `tick_at + starting_deadline` and stamps it onto
541    /// each Command as `deadline_at`; agents skip Commands they
542    /// receive after that absolute time. `None` (default) = no
543    /// deadline, meaning a Command queued in the broker / stream
544    /// during agent downtime runs whenever the agent reconnects —
545    /// good for kitting / inventory / cleanup. Set this for
546    /// time-of-day notifications, lunch reminders, etc., where
547    /// "fire 3 hours late" would be wrong.
548    #[serde(default, skip_serializing_if = "Option::is_none")]
549    pub starting_deadline: Option<String>,
550    /// v0.23: where does the cron tick happen? `Backend` (default,
551    /// historical) = backend's scheduler fires Commands via NATS;
552    /// agents passively receive. `Agent` = each targeted agent runs
553    /// its own internal cron and fires locally, so the schedule
554    /// keeps ticking even when the broker is unreachable (laptop on
555    /// the train, broker maintenance window, full WAN outage). The
556    /// two locations are mutually exclusive — when `Agent`, the
557    /// backend scheduler stays out and just keeps the definition in
558    /// KV for agents to read.
559    #[serde(default)]
560    pub runs_on: RunsOn,
561    #[serde(default = "default_true")]
562    pub enabled: bool,
563}
564
565/// v0.23 — where the cron tick fires from.
566#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)]
567#[serde(rename_all = "snake_case")]
568pub enum RunsOn {
569    /// Backend's central scheduler ticks and publishes Commands to
570    /// NATS. Historical default, what every pre-v0.23 schedule
571    /// uses. Agent offline ⇒ Command queued in STREAM_EXEC; agent
572    /// reconnects ⇒ catch-up via [`command_replay`](crate)
573    /// (see kanade-agent's command_replay module).
574    #[default]
575    Backend,
576    /// Each targeted agent runs the cron tick locally. Survives
577    /// broker / WAN outages. Best for laptops / mobile devices that
578    /// roam off the corporate network. Agent must be online for the
579    /// initial schedule + job-catalog pull, but once cached the
580    /// agent fires the script standalone.
581    Agent,
582}
583
584/// Per-pc/per-target dedup semantics for a [`Schedule`] (v0.19).
585#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)]
586#[serde(rename_all = "snake_case")]
587pub enum ExecMode {
588    /// Fire on every cron tick at the whole target. Historical
589    /// (pre-v0.19) behavior; no dedup.
590    #[default]
591    EveryTick,
592    /// Fire at each pc until that pc succeeds; then skip it until
593    /// the optional cooldown elapses (or forever if no cooldown).
594    /// Use for kitting / first-boot / per-pc compliance checks.
595    OncePerPc,
596    /// Fire at the whole target until **any** pc succeeds; then
597    /// skip the whole target until the optional cooldown elapses
598    /// (or forever if no cooldown). Use for "one delegate is
599    /// enough" tasks like license check-in.
600    OncePerTarget,
601}
602
603fn default_true() -> bool {
604    true
605}