Skip to main content

kanade_shared/
manifest.rs

1use serde::{Deserialize, Serialize};
2
3use crate::ipc::jobs::JobCategory;
4use crate::wire::{RunAs, Shell, Staleness};
5
6/// YAML job manifest (= registered "what to run", v0.18.0+).
7///
8/// Owns only script-intrinsic fields. **Who** (`target`), **how to
9/// phase fanout** (`rollout`), and **when to stagger start**
10/// (`jitter`) all moved to the Schedule / exec request side — same
11/// script can now be fired against different targets / rollouts
12/// without copying the script body.
13///
14/// `deny_unknown_fields` makes operators copy-pasting an older yaml
15/// that still has `target:` / `rollout:` see a clear parse error at
16/// `kanade job create` time instead of mysteriously losing it.
17#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
18#[serde(deny_unknown_fields)]
19pub struct Manifest {
20    pub id: String,
21    pub version: String,
22    #[serde(default)]
23    pub description: Option<String>,
24    pub execute: Execute,
25    #[serde(default)]
26    pub require_approval: bool,
27    /// Opt-in marker that this job produces a JSON inventory fact
28    /// payload on stdout. When present, the backend's results
29    /// projector parses `ExecResult.stdout` as JSON and upserts an
30    /// `inventory_facts` row keyed by `(pc_id, manifest.id)`. The
31    /// `display` sub-config drives the SPA's Inventory page render.
32    #[serde(default)]
33    pub inventory: Option<InventoryHint>,
34    /// Issue #246: opt-in marker that this job emits per-line
35    /// observability events on stdout (one JSON `ObsEvent` per
36    /// newline). When present, the agent — after the script exits
37    /// successfully — parses each non-empty stdout line as an
38    /// `ObsEvent`, publishes it on `obs.<pc_id>` via the
39    /// `obs_outbox`, and (intentionally) **omits the stdout from
40    /// the `ExecResult`** so the timeline data doesn't double up
41    /// in `execution_results.stdout` (which would multiply rows
42    /// by ~50/day/PC of noise).
43    ///
44    /// Distinct from `inventory:` (single JSON object → projector
45    /// upsert) — events are append-only timeline points consumed
46    /// by the dedicated `obs_events` table.
47    #[serde(default)]
48    pub emit: Option<EmitConfig>,
49    /// #290: opt-in marker that this job is an operator-defined
50    /// **health check** whose result feeds the Client App's Health
51    /// tab over KLP (`StateSnapshot.checks`). The script prints a
52    /// free-form JSON object on stdout (like any inventory job); the
53    /// agent reads the [`CheckHint::status_field`] value dynamically
54    /// into a [`crate::ipc::state::Check`] named `check.name`.
55    /// Cadence / windows / conditions come from
56    /// the job's Schedule (exactly like inventory) — there is
57    /// deliberately no interval here. **Composes with `inventory:`**:
58    /// the script's stdout is one JSON object, so a check can also
59    /// carry an `inventory:` block to project the rest of that object
60    /// (incl. `explode` sub-tables) for SPA fleet-querying. Only
61    /// `emit:` (NDJSON stdout) is incompatible.
62    #[serde(default)]
63    pub check: Option<CheckHint>,
64    /// v0.26: Layer 2 staleness policy (SPEC.md §2.6.2). Controls
65    /// what the agent does at fire time when it can't verify the
66    /// `script_current` / `script_status` KV values are fresh —
67    /// especially relevant for `runs_on: agent` schedules where
68    /// the agent may fire from cache while offline. Defaults to
69    /// `Staleness::Cached` (silently use cached values), which
70    /// matches every pre-v0.26 Manifest.
71    #[serde(default)]
72    pub staleness: Staleness,
73    /// #291: opt-in marker that this job is offered to **end users**
74    /// in the Client App's job tabs over KLP (`jobs.list` →
75    /// `jobs.execute`). Parallel to [`inventory`] / [`check`] /
76    /// [`emit`]: the block's mere presence is the opt-in, and it
77    /// groups the end-user presentation fields (name / category /
78    /// icon) that only make sense for a user-facing job. `None`
79    /// (the default) ⇒ an operator-only job — inventory, checks,
80    /// scheduled maintenance — that never surfaces in the catalog.
81    ///
82    /// The agent re-reads this at every `jobs.list` / `jobs.execute`
83    /// (SPEC §2.1), so removing the block takes a job out of a
84    /// running client on its next action.
85    ///
86    /// [`inventory`]: Manifest::inventory
87    /// [`check`]: Manifest::check
88    /// [`emit`]: Manifest::emit
89    #[serde(default, skip_serializing_if = "Option::is_none")]
90    pub client: Option<ClientHint>,
91}
92
93/// "Who + how + when-to-stagger" — the fanout-plan side of an exec.
94/// Used both as the POST `/api/exec/{job_id}` body and as the embedded
95/// `target` / `rollout` / `jitter` slot on [`Schedule`]. Centralising
96/// here keeps the validation + serialisation logic in one place.
97#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Default)]
98pub struct FanoutPlan {
99    #[serde(default)]
100    pub target: Target,
101    /// Optional wave rollout — when present, the backend publishes
102    /// each wave's group subject on its own delay schedule instead
103    /// of fanning out the `target` block in one go. `target` then
104    /// only labels the deploy for the audit log.
105    #[serde(default, skip_serializing_if = "Option::is_none")]
106    pub rollout: Option<Rollout>,
107    /// Optional humantime jitter; agent uses it to randomise
108    /// execution start. Lives here (not on the script) so different
109    /// schedules / ad-hoc fires of the same job can pick different
110    /// stagger windows.
111    #[serde(default, skip_serializing_if = "Option::is_none")]
112    pub jitter: Option<String>,
113    /// Absolute time the scheduler stamps on each emitted Command
114    /// when this exec was driven by a [`Schedule`] with
115    /// `starting_deadline`. Agents receiving a Command after this
116    /// instant publish a synthetic skipped-result instead of
117    /// running the script. `None` (default) = no deadline / catch
118    /// up whenever delivered. Operators don't usually set this
119    /// directly — the scheduler computes it from `tick_at +
120    /// starting_deadline`.
121    #[serde(default, skip_serializing_if = "Option::is_none")]
122    pub deadline_at: Option<chrono::DateTime<chrono::Utc>>,
123}
124
125/// Manifest sub-section: how the SPA should render the inventory
126/// facts this job produces. Each field name (`field`) is a top-level
127/// key in the stdout JSON, e.g. `hostname`, `ram_gb`.
128///
129/// Two render modes:
130///   * `display` — vertical "field / value" per PC, used by the
131///     `/inventory?pc=<id>` detail view. ALL columns the operator
132///     wants visible on the detail page.
133///   * `summary` — horizontal table across the fleet (row = PC,
134///     column = field) on `/inventory`. Optional; when omitted the
135///     SPA falls back to `display`, but operators usually want a
136///     trimmer "hostname / OS / CPU / RAM" set for the fleet view.
137#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
138pub struct InventoryHint {
139    /// Detail-view columns, in order.
140    pub display: Vec<DisplayField>,
141    /// Optional fleet-list columns (row = PC). Defaults to `display`
142    /// when omitted, but operators usually pick a 3-5 column subset.
143    #[serde(default, skip_serializing_if = "Option::is_none")]
144    pub summary: Option<Vec<DisplayField>>,
145    /// v0.31 / #40: payload arrays that should be exploded into
146    /// per-element rows of a derived SQLite table. Lets operators
147    /// answer cross-PC questions ("which PCs still have Chrome <
148    /// 120?", "C: >90% full") with normal SQL filters + indexes
149    /// instead of grepping JSON. The projector creates the derived
150    /// table on register and replaces this PC's rows on each result
151    /// (DELETE WHERE pc_id=? AND job_id=? + bulk INSERT). See
152    /// [`ExplodeSpec`] for the per-spec schema.
153    #[serde(default, skip_serializing_if = "Option::is_none")]
154    pub explode: Option<Vec<ExplodeSpec>>,
155    /// v0.35 / #93: top-level scalar fields whose changes the
156    /// projector logs to `inventory_history` (one event per
157    /// changed field per scan). Pairs with `explode[].track_history`
158    /// — that covers array elements; this covers single-valued
159    /// fields like `ram_bytes` / `os_version` / `cpu_model` /
160    /// `os_build` that operators want to track for "did the RAM
161    /// get upgraded?" / "when did Win 11 land on this PC?" /
162    /// "BIOS / firmware bumped?" questions. Field name = `field_path`
163    /// in the history row, `identity_json` is NULL, `before_json`
164    /// / `after_json` each carry `{"value": <prior or new value>}`.
165    /// First-ever observation of a scalar (no prior facts row)
166    /// emits `added`; subsequent value changes emit `changed`. No
167    /// `removed` events — a scalar disappearing from the payload
168    /// is rare and the operator can still see the last value via
169    /// the `before_json` of the most recent change.
170    #[serde(default, skip_serializing_if = "Option::is_none")]
171    pub history_scalars: Option<Vec<String>>,
172}
173
174/// Manifest sub-section (#290): marks a job as an operator-defined
175/// **health check**. Parallel to [`InventoryHint`] / `EmitConfig`.
176/// The stdout contract is a free-form JSON object (same as any
177/// inventory job) from which the agent reads `status_field` /
178/// `detail_field` to build the KLP [`crate::ipc::state::Check`] shown
179/// on the Client App's Health tab.
180///
181/// There is deliberately **no timing field** — when / how often /
182/// in which window a check runs is driven by the job's Schedule,
183/// exactly like inventory jobs, so operators get the full `when:` /
184/// rollout / `runs_on` expressiveness for free.
185///
186/// A check's stdout is a **free-form inventory object** (arbitrary
187/// key/value pairs + arrays) — same as any inventory job — that also
188/// carries a status field. `check:` adds only the health semantics on
189/// top: which field is the ok/warn/fail/unknown status, an optional
190/// one-line summary field, and a remediation job. Everything else
191/// (rich per-PC detail, `explode` sub-tables like a software list) is
192/// driven by a co-present [`InventoryHint`] and rendered with the
193/// SAME display logic the SPA Inventory page uses — on the Client App
194/// too. This keeps checks maximally expressive without a bespoke
195/// payload type.
196#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
197#[serde(deny_unknown_fields)]
198pub struct CheckHint {
199    /// Stable check id → [`Check.name`](crate::ipc::state::Check),
200    /// the SPA/Client React key + analytics label. Unique within the
201    /// fleet's check set.
202    pub name: String,
203    /// Top-level stdout field whose string value
204    /// (`ok`/`warn`/`fail`/`unknown`) becomes the Health-tab light
205    /// ([`CheckStatus`](crate::ipc::state::CheckStatus)). Defaults to
206    /// `"status"`; a missing / unparseable value → `unknown`.
207    #[serde(default = "default_status_field")]
208    pub status_field: String,
209    /// Top-level stdout field used as the Health-tab row's one-line
210    /// summary. Defaults to `"detail"`; absent in the payload → no
211    /// detail line (the rich breakdown lives in the inventory view).
212    #[serde(default = "default_detail_field")]
213    pub detail_field: String,
214    /// Optional remediation job id →
215    /// [`Check.troubleshoot`](crate::ipc::state::Check). The Client
216    /// App shows a "修復する" button when present; that job must be
217    /// `user_invokable`.
218    #[serde(default, skip_serializing_if = "Option::is_none")]
219    pub troubleshoot: Option<String>,
220    /// #290 PR-E: when `true` (default), the backend also projects this
221    /// check's `status` / `detail` into the `check_status` table so the
222    /// operator SPA gets a fleet-wide compliance view for free — no
223    /// `inventory:` block needed. Set `fleet: false` for a client-only
224    /// check the operator doesn't want surfaced across the fleet.
225    #[serde(default = "default_fleet")]
226    pub fleet: bool,
227}
228
229fn default_status_field() -> String {
230    "status".to_string()
231}
232
233fn default_detail_field() -> String {
234    "detail".to_string()
235}
236
237fn default_fleet() -> bool {
238    true
239}
240
241/// Manifest sub-section (#291): marks a job as **user-invokable**
242/// from the Client App and carries how it presents to the end user.
243/// Parallel to [`InventoryHint`] / [`CheckHint`] / `EmitConfig` —
244/// the block's presence is the opt-in (no separate boolean), and its
245/// required fields (`name`, `category`) are enforced by serde at
246/// parse time, so a half-filled catalog entry fails
247/// `kanade job create` instead of rendering a nameless / tab-less row.
248///
249/// The agent maps this 1:1 into the KLP
250/// [`UserInvokableJob`](crate::ipc::jobs::UserInvokableJob) wire shape
251/// that `jobs.list` returns; the Client App renders one row per job in
252/// the tab named by `category`.
253#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
254#[serde(deny_unknown_fields)]
255pub struct ClientHint {
256    /// End-user-facing title for the job row. The operator-internal
257    /// `Manifest::id` slug is rarely what an end user should read, so
258    /// this is required (and validated non-empty by
259    /// [`Manifest::validate`]). Maps to `UserInvokableJob::display_name`.
260    pub name: String,
261    /// Optional one-line subtitle under `name` in the Client App.
262    /// Distinct from the operator-facing top-level
263    /// [`Manifest::description`] — this one is written for the end
264    /// user. Maps to `UserInvokableJob::display_description`.
265    #[serde(default, skip_serializing_if = "Option::is_none")]
266    pub description: Option<String>,
267    /// Which Client App tab the job lives in (`software_update` →
268    /// アップデート, `troubleshoot` → 困ったとき, `catalog` → software
269    /// catalog). Required — without it the agent can't place the job
270    /// in a tab.
271    pub category: JobCategory,
272    /// Optional icon hint for the job row — a lucide-react icon name
273    /// or a `data:` URL. `None` ⇒ the Client App falls back to the
274    /// category's default icon. Surfaced verbatim in
275    /// `jobs.list[].icon`.
276    #[serde(default, skip_serializing_if = "Option::is_none")]
277    pub icon: Option<String>,
278}
279
280/// Issue #246 — `emit:` manifest block for jobs whose stdout is
281/// NDJSON observability events (one `ObsEvent` per line). Parallel
282/// to `inventory:` but for the append-only timeline pipeline; see
283/// `Manifest::emit` for the full contract.
284#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
285#[serde(deny_unknown_fields)]
286pub struct EmitConfig {
287    /// What kind of payload the agent should expect on stdout. Only
288    /// `events` is defined today (parses each non-empty line as
289    /// `ObsEvent` and publishes on `obs.<pc_id>`); future variants
290    /// (e.g. metrics streams, structured trace events) plug in here.
291    #[serde(rename = "type")]
292    pub kind: EmitKind,
293    /// Operator hint for where the script keeps its own state — the
294    /// watermark file the PowerShell / sh body reads + writes
295    /// between runs so it only emits NEW events since the last
296    /// poll. The agent doesn't read this; it's documentation that
297    /// the SPA (and `kanade job edit`) can surface to operators
298    /// reviewing the manifest. Optional; the script is allowed to
299    /// keep state anywhere (registry, env, etc.) — the field's
300    /// presence makes the convention discoverable.
301    #[serde(default, skip_serializing_if = "Option::is_none")]
302    pub watermark_path: Option<String>,
303}
304
305/// `emit.type` enum. Lowercase serde so manifests read
306/// `type: events` rather than `Events`.
307#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Copy, PartialEq, Eq)]
308#[serde(rename_all = "lowercase")]
309pub enum EmitKind {
310    /// Per-line `ObsEvent` JSON. Agent parses + publishes on
311    /// `obs.<pc_id>`, drops the stdout from the resulting
312    /// `ExecResult`.
313    Events,
314}
315
316/// v0.31 / #40: declarative "flatten this JSON array into a real
317/// SQLite table" spec on an inventory manifest. The projector
318/// creates the table on first registration (CREATE TABLE IF NOT
319/// EXISTS + indexes) and writes a row per element of
320/// `payload[field]` on every result, scoped by (pc_id, job_id) so
321/// each PC's rows replace cleanly without a per-PC schema.
322#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
323pub struct ExplodeSpec {
324    /// JSON array key under the payload to explode. E.g. `"apps"`
325    /// for `payload: { apps: [{...}, {...}] }`.
326    pub field: String,
327    /// Derived SQLite table name. Operators choose this — pick
328    /// something namespaced + stable (`inventory_sw_apps`, not
329    /// `apps`) so multiple inventory manifests don't collide on a
330    /// generic name.
331    pub table: String,
332    /// Element-level fields that uniquely identify a row inside one
333    /// PC's payload. The full PK is `(pc_id, job_id) + these
334    /// columns`. Required — operators must think about uniqueness
335    /// (e.g. `["name", "source"]` for installed apps because the
336    /// same name appears in multiple uninstall hives).
337    ///
338    /// v0.31 / #41: same tuple drives history identity. When
339    /// `track_history` is on, the projector serialises these
340    /// fields' values into `inventory_history.identity_json` for
341    /// every change event, so queries like "every PC that ever
342    /// installed Chrome (any source)" filter on identity_json
343    /// content without a per-manifest schema.
344    pub primary_key: Vec<String>,
345    /// Per-element fields that become columns in the derived table.
346    pub columns: Vec<ExplodeColumn>,
347    /// v0.31 / #41: when true (default false), the projector
348    /// diffs each PC's incoming payload against the prior rows
349    /// for the same (pc_id, job_id) BEFORE the DELETE-then-INSERT
350    /// replace, and writes added / removed / changed events into
351    /// `inventory_history`. Lets operators answer time-dimension
352    /// questions ("when did Chrome 120 first appear on PC X?",
353    /// "what's the Win 11 23H2 rollout curve") without storing
354    /// per-scan snapshots. Off by default so operators opt in
355    /// per-spec — history has a real storage cost on long-lived
356    /// deployments (mitigated by the 90-day default retention
357    /// sweeper, see `cleanup` module).
358    #[serde(default)]
359    pub track_history: bool,
360}
361
362/// One column in an [`ExplodeSpec`]'s derived table.
363#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
364pub struct ExplodeColumn {
365    /// JSON key under each array element. Becomes the column name
366    /// in the derived SQLite table — we don't rename.
367    pub field: String,
368    /// SQLite affinity: `"text"` (default), `"integer"`, `"real"`.
369    /// Storage maps directly via `sqlx::query.bind(...)`; type
370    /// mismatches at INSERT-time fail loudly rather than silently
371    /// dropping the row.
372    #[serde(default, skip_serializing_if = "Option::is_none")]
373    #[serde(rename = "type")]
374    pub kind: Option<String>,
375    /// When true, the projector creates a `CREATE INDEX` on this
376    /// column at table-creation time. Boost for the common-filter
377    /// columns (`name`, `version`) — operators mark them
378    /// explicitly, the projector won't guess.
379    #[serde(default)]
380    pub index: bool,
381}
382
383#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
384pub struct DisplayField {
385    /// Top-level key in the stdout JSON.
386    pub field: String,
387    /// Human-readable column header.
388    pub label: String,
389    /// Optional render hint — `"number"`, `"bytes"`, `"timestamp"`,
390    /// or `"table"` (#39). Defaults to plain text rendering on the
391    /// SPA side. `"table"` expects the field's value to be a JSON
392    /// array of objects and renders a nested sub-table on the
393    /// per-PC detail page using `columns` as the schema; the fleet
394    /// summary view falls back to showing the row count for
395    /// `"table"` cells so the wide list stays compact.
396    #[serde(default, skip_serializing_if = "Option::is_none")]
397    #[serde(rename = "type")]
398    pub kind: Option<String>,
399    /// v0.30 / #39: when `kind == "table"`, the SPA renders the
400    /// field's value (an array of objects like
401    /// `disks: [{ device_id, size_bytes, ... }]`) as a nested
402    /// sub-table using these columns. Each column is itself a
403    /// `DisplayField`, so the nested cells reuse the same render
404    /// hints (`bytes`, `number`, `timestamp`) — no parallel format
405    /// pipeline. Ignored for any other `kind`.
406    #[serde(default, skip_serializing_if = "Option::is_none")]
407    pub columns: Option<Vec<DisplayField>>,
408}
409
410#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
411pub struct Rollout {
412    #[serde(default)]
413    pub strategy: RolloutStrategy,
414    pub waves: Vec<Wave>,
415}
416
417#[derive(
418    Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Copy, PartialEq, Eq, Default,
419)]
420#[serde(rename_all = "lowercase")]
421pub enum RolloutStrategy {
422    #[default]
423    Wave,
424}
425
426#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
427pub struct Wave {
428    pub group: String,
429    /// humantime delay measured from the deploy's publish time. wave[0]
430    /// typically has "0s"; subsequent waves use minutes / hours.
431    pub delay: String,
432}
433
434#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Default)]
435pub struct Target {
436    #[serde(default)]
437    pub groups: Vec<String>,
438    #[serde(default)]
439    pub pcs: Vec<String>,
440    #[serde(default)]
441    pub all: bool,
442}
443
444impl Target {
445    /// At least one of all / groups / pcs is set.
446    pub fn is_specified(&self) -> bool {
447        self.all || !self.groups.is_empty() || !self.pcs.is_empty()
448    }
449}
450
451#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
452#[serde(deny_unknown_fields)]
453pub struct Execute {
454    pub shell: ExecuteShell,
455    /// Inline script body. Mutually exclusive with [`script_file`]
456    /// and [`script_object`]; exactly one of the three must be set
457    /// (enforced by [`Execute::validate_script_source`] at the
458    /// write-side parse boundaries — `kanade job create` and
459    /// `POST /api/jobs`).
460    ///
461    /// Empty string is treated as **unset** so operators can swap
462    /// to a `script_file:` / `script_object:` alternative just by
463    /// commenting out the body, without having to also drop the
464    /// `script:` key entirely.
465    ///
466    /// [`script_file`]: Self::script_file
467    /// [`script_object`]: Self::script_object
468    #[serde(default, skip_serializing_if = "Option::is_none")]
469    pub script: Option<String>,
470    /// Repo-local file path resolved by the operator-side CLI at
471    /// `kanade job create` time. The CLI reads the file, slots its
472    /// contents into `script`, and clears this field before
473    /// POSTing — so the backend / agents never see `script_file`
474    /// in stored manifests. SPEC §2.4.1.
475    ///
476    /// Resolver lands in a follow-up PR
477    /// (yukimemi/kanade#210); today this field passes parse-time
478    /// validation but the operator-side CLI bails with "not yet
479    /// implemented" until the resolver ships, so manifests that
480    /// reach the backend with `script_file` set are treated as a
481    /// schema-bug.
482    #[serde(default, skip_serializing_if = "Option::is_none")]
483    pub script_file: Option<String>,
484    /// Object Store reference (`<name>/<version>`) into the
485    /// `scripts` bucket (`OBJECT_SCRIPTS`). Agents fetch the body
486    /// at Execute time via `/api/script-objects/{name}/{version}`
487    /// and cache it locally. SPEC §2.4.1.
488    ///
489    /// Resolver lands in the same follow-up PR as `script_file`;
490    /// today this field passes parse-time validation but the
491    /// backend / agent exec paths bail with "not yet implemented"
492    /// when they see it.
493    #[serde(default, skip_serializing_if = "Option::is_none")]
494    pub script_object: Option<String>,
495    /// humantime duration string (e.g. "30s", "10m"). Script-intrinsic
496    /// — represents how long this script reasonably takes to run.
497    pub timeout: String,
498    /// Token + session combination the agent uses to launch the
499    /// script (v0.21). Default = [`RunAs::System`] (Session 0,
500    /// LocalSystem privileges, no GUI) — matches pre-v0.21 behavior.
501    #[serde(default)]
502    pub run_as: RunAs,
503    /// Working directory for the spawned child (v0.21.1). When
504    /// unset, the child inherits the agent's cwd — on Windows that
505    /// means `%SystemRoot%\System32` for the prod service, which is
506    /// almost never what operators actually want. Use an absolute
507    /// path; relative paths are passed through to the OS verbatim.
508    /// `%PROGRAMDATA%` works for `run_as: system`; for `run_as: user`
509    /// you'd want `%USERPROFILE%` (but expansion happens in the
510    /// shell, so write `$env:USERPROFILE` for PowerShell, or set
511    /// it via teravars before `kanade job create`).
512    #[serde(default, skip_serializing_if = "Option::is_none")]
513    pub cwd: Option<String>,
514}
515
516impl Execute {
517    /// Treat an empty `script:` body as "intentionally unset". Operators
518    /// commenting out a block-scalar tend to leave the key behind, and
519    /// failing the validator on `script: ""` would surprise them.
520    fn has_inline_script(&self) -> bool {
521        matches!(&self.script, Some(s) if !s.is_empty())
522    }
523
524    /// Enforce that exactly one of `script` / `script_file` /
525    /// `script_object` is set. Called at the write-side parse
526    /// boundaries (CLI `kanade job create` + backend
527    /// `POST /api/jobs`) so ambiguous YAML is rejected before it
528    /// reaches the JOBS KV. Read paths (projector, agent
529    /// scheduler, list endpoints) skip this check — they only ever
530    /// see what the write path already validated.
531    pub fn validate_script_source(&self) -> Result<(), String> {
532        let inline = self.has_inline_script();
533        let file = self.script_file.is_some();
534        let obj = self.script_object.is_some();
535        let set = [inline, file, obj].into_iter().filter(|b| *b).count();
536        match set {
537            1 => Ok(()),
538            0 => Err("execute: one of `script`, `script_file`, `script_object` must be set".into()),
539            _ => Err(format!(
540                "execute: only one of `script` / `script_file` / `script_object` may be set \
541                 (got script={inline}, script_file={file}, script_object={obj})"
542            )),
543        }
544    }
545}
546
547impl Manifest {
548    /// Cross-field semantic checks that don't fit into pure serde
549    /// derive. Currently delegates to
550    /// [`Execute::validate_script_source`] — see that method's
551    /// docs for the rationale on which call sites should run this.
552    pub fn validate(&self) -> Result<(), String> {
553        self.execute.validate_script_source()?;
554        // Stdout-format compatibility. `inventory:` and `check:` both
555        // consume the SAME single JSON object — they COMPOSE: a check
556        // can extract `status`/`detail` for the Health tab while the
557        // projector explodes the rest into SPA sub-tables. `emit:` is
558        // different — its stdout is NDJSON and the agent omits it from
559        // the result entirely — so it can't be paired with either.
560        if self.emit.is_some() && (self.inventory.is_some() || self.check.is_some()) {
561            return Err(
562                "`emit:` is incompatible with `inventory:` / `check:` — emit's stdout is NDJSON \
563                 timeline events (and omitted from the result), while inventory/check read a \
564                 single JSON object from stdout"
565                    .to_string(),
566            );
567        }
568        // A check's `name` is the Health-tab row id (React key); the
569        // field names tell the agent where to read status/detail.
570        // An empty value is an invisible runtime bug, and the serde
571        // defaults don't guard an operator who writes `status_field:
572        // ""` explicitly — reject all three here.
573        if let Some(check) = &self.check {
574            for (label, value) in [
575                ("check.name", &check.name),
576                ("check.status_field", &check.status_field),
577                ("check.detail_field", &check.detail_field),
578            ] {
579                if value.trim().is_empty() {
580                    return Err(format!("{label} must not be empty"));
581                }
582            }
583            // A present-but-blank `troubleshoot` is a broken
584            // remediation job id (the "修復する" button would target
585            // an empty manifest id) — reject it too.
586            if let Some(troubleshoot) = &check.troubleshoot {
587                if troubleshoot.trim().is_empty() {
588                    return Err("check.troubleshoot must not be empty when set".to_string());
589                }
590            }
591        }
592        // #291: a `client:` job is rendered in the Client App's
593        // catalog (`jobs.list` → `jobs.execute`). serde already makes
594        // `name` + `category` required at parse time; the only gap is
595        // a present-but-blank `name`, which would render an empty row
596        // title — reject it like the other display-id fields.
597        if let Some(client) = &self.client {
598            if client.name.trim().is_empty() {
599                return Err("client.name must not be empty".to_string());
600            }
601            // Optional display fields, when present, must be
602            // meaningful: a blank `description` renders an empty
603            // subtitle and a blank `icon` is a dangling lucide name.
604            // Same present-but-blank guard the `check:` block applies
605            // to its optional `troubleshoot` id.
606            for (label, value) in [
607                ("client.description", &client.description),
608                ("client.icon", &client.icon),
609            ] {
610                if let Some(v) = value {
611                    if v.trim().is_empty() {
612                        return Err(format!("{label} must not be empty when set"));
613                    }
614                }
615            }
616        }
617        Ok(())
618    }
619}
620
621#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Copy, PartialEq, Eq)]
622#[serde(rename_all = "lowercase")]
623pub enum ExecuteShell {
624    Powershell,
625    Cmd,
626}
627
628impl From<ExecuteShell> for Shell {
629    fn from(s: ExecuteShell) -> Self {
630        match s {
631            ExecuteShell::Powershell => Shell::Powershell,
632            ExecuteShell::Cmd => Shell::Cmd,
633        }
634    }
635}
636
637#[cfg(test)]
638mod tests {
639    use super::*;
640
641    /// The example check-job + schedule YAMLs shipped under `configs/`
642    /// must stay valid as the schema evolves (#290 PR-C). `include_str!`
643    /// pins them at compile time so a breaking edit fails `cargo test`
644    /// rather than only `kanade job create` at deploy time.
645    #[test]
646    fn example_check_job_yamls_parse_and_validate() {
647        let jobs = [
648            (
649                "check-bitlocker",
650                include_str!("../../../configs/jobs/check-bitlocker.yaml"),
651            ),
652            (
653                "check-av-signature",
654                include_str!("../../../configs/jobs/check-av-signature.yaml"),
655            ),
656            (
657                "check-cert-expiry",
658                include_str!("../../../configs/jobs/check-cert-expiry.yaml"),
659            ),
660        ];
661        for (name, yaml) in jobs {
662            let m: Manifest =
663                serde_yaml::from_str(yaml).unwrap_or_else(|e| panic!("{name} parse: {e}"));
664            m.validate()
665                .unwrap_or_else(|e| panic!("{name} validate: {e}"));
666            let check = m
667                .check
668                .as_ref()
669                .unwrap_or_else(|| panic!("{name} must carry a check: hint"));
670            assert!(!check.name.trim().is_empty(), "{name} check.name empty");
671            // These three examples all read admin-only WMI namespaces,
672            // so they run_as system. NOTE: that's a property of these
673            // particular checks, NOT of the `check:` contract — a check
674            // probing user-session state could legitimately run_as user.
675            assert_eq!(
676                m.execute.run_as,
677                RunAs::System,
678                "{name} should run_as system"
679            );
680        }
681    }
682
683    /// The example user-invokable job YAMLs (#291) shipped under
684    /// `configs/jobs/` must stay valid as the `client:` schema
685    /// evolves. `include_str!` pins them at compile time so a breaking
686    /// edit fails `cargo test`, not `kanade job create` at deploy.
687    #[test]
688    fn example_client_job_yamls_parse_and_validate() {
689        let jobs = [
690            (
691                "fix-teams-cache",
692                JobCategory::Troubleshoot,
693                include_str!("../../../configs/jobs/fix-teams-cache.yaml"),
694            ),
695            (
696                "chrome-update",
697                JobCategory::SoftwareUpdate,
698                include_str!("../../../configs/jobs/chrome-update.yaml"),
699            ),
700            (
701                "install-slack",
702                JobCategory::Catalog,
703                include_str!("../../../configs/jobs/install-slack.yaml"),
704            ),
705        ];
706        for (id, category, yaml) in jobs {
707            let m: Manifest =
708                serde_yaml::from_str(yaml).unwrap_or_else(|e| panic!("{id} parse: {e}"));
709            m.validate()
710                .unwrap_or_else(|e| panic!("{id} validate: {e}"));
711            assert_eq!(m.id, id, "{id} id mismatch");
712            let client = m
713                .client
714                .as_ref()
715                .unwrap_or_else(|| panic!("{id} must carry a client: block"));
716            assert!(!client.name.trim().is_empty(), "{id} client.name empty");
717            assert_eq!(client.category, category, "{id} category");
718        }
719    }
720
721    #[test]
722    fn example_check_schedule_yamls_parse_and_validate() {
723        let schedules = [
724            (
725                "check-bitlocker",
726                include_str!("../../../configs/schedules/check-bitlocker.yaml"),
727            ),
728            (
729                "check-av-signature",
730                include_str!("../../../configs/schedules/check-av-signature.yaml"),
731            ),
732            (
733                "check-cert-expiry",
734                include_str!("../../../configs/schedules/check-cert-expiry.yaml"),
735            ),
736        ];
737        for (name, yaml) in schedules {
738            let s: Schedule =
739                serde_yaml::from_str(yaml).unwrap_or_else(|e| panic!("{name} schedule parse: {e}"));
740            s.validate()
741                .unwrap_or_else(|e| panic!("{name} schedule validate: {e}"));
742            assert_eq!(s.job_id, name, "{name} schedule must reference its job");
743        }
744    }
745
746    #[test]
747    fn target_is_specified_requires_at_least_one_field() {
748        let empty = Target::default();
749        assert!(!empty.is_specified());
750
751        let with_all = Target {
752            all: true,
753            ..Target::default()
754        };
755        assert!(with_all.is_specified());
756
757        let with_groups = Target {
758            groups: vec!["canary".into()],
759            ..Target::default()
760        };
761        assert!(with_groups.is_specified());
762
763        let with_pcs = Target {
764            pcs: vec!["pc-01".into()],
765            ..Target::default()
766        };
767        assert!(with_pcs.is_specified());
768    }
769
770    #[test]
771    fn manifest_deserialises_minimal_yaml() {
772        // Matches jobs/echo-test.yaml. v0.18: no target/rollout/jitter
773        // — those live on the schedule / exec request now.
774        let yaml = r#"
775id: echo-test
776version: 0.0.1
777execute:
778  shell: powershell
779  script: "echo 'kanade'"
780  timeout: 30s
781"#;
782        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
783        assert_eq!(m.id, "echo-test");
784        assert_eq!(m.version, "0.0.1");
785        assert!(matches!(m.execute.shell, ExecuteShell::Powershell));
786        assert_eq!(
787            m.execute.script.as_deref().map(str::trim),
788            Some("echo 'kanade'")
789        );
790        assert!(m.execute.script_file.is_none());
791        assert!(m.execute.script_object.is_none());
792        assert_eq!(m.execute.timeout, "30s");
793        assert!(!m.require_approval);
794        m.validate()
795            .expect("inline-script manifest passes validation");
796    }
797
798    #[test]
799    fn manifest_parses_check_job_and_validates() {
800        // An operator-defined health check (#290): a `check:` hint +
801        // a PowerShell script that prints {status, detail}.
802        let yaml = r#"
803id: check-bitlocker
804version: 0.1.0
805execute:
806  shell: powershell
807  run_as: system
808  timeout: 15s
809  script: |
810    [pscustomobject]@{ status = 'ok'; detail = 'all volumes protected' } | ConvertTo-Json -Compress
811check:
812  name: bitlocker
813  troubleshoot: fix-bitlocker
814"#;
815        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
816        let check = m.check.as_ref().expect("check hint present");
817        assert_eq!(check.name, "bitlocker");
818        assert_eq!(check.troubleshoot.as_deref(), Some("fix-bitlocker"));
819        // Field names default to the conventional "status" / "detail".
820        assert_eq!(check.status_field, "status");
821        assert_eq!(check.detail_field, "detail");
822        assert!(m.inventory.is_none() && m.emit.is_none());
823        m.validate().expect("check-only manifest passes validation");
824    }
825
826    #[test]
827    fn manifest_check_defaults_and_custom_fields() {
828        // Minimal: only `name`; status/detail fields default.
829        let m: Manifest = serde_yaml::from_str(
830            r#"
831id: check-disk
832version: 0.1.0
833execute:
834  shell: powershell
835  script: "[pscustomobject]@{ status = 'ok' } | ConvertTo-Json -Compress"
836  timeout: 10s
837check:
838  name: disk_free
839"#,
840        )
841        .expect("parse");
842        let c = m.check.as_ref().unwrap();
843        assert_eq!(c.name, "disk_free");
844        assert_eq!(c.status_field, "status");
845        assert_eq!(c.detail_field, "detail");
846        assert!(c.troubleshoot.is_none());
847        m.validate().expect("validates");
848
849        // The operator can point status/detail at any field of their
850        // free-form inventory object.
851        let m2: Manifest = serde_yaml::from_str(
852            r#"
853id: check-custom
854version: 0.1.0
855execute:
856  shell: powershell
857  script: "echo x"
858  timeout: 10s
859check:
860  name: patch_level
861  status_field: compliance
862  detail_field: summary
863"#,
864        )
865        .expect("parse");
866        let c2 = m2.check.as_ref().unwrap();
867        assert_eq!(c2.status_field, "compliance");
868        assert_eq!(c2.detail_field, "summary");
869    }
870
871    #[test]
872    fn manifest_allows_check_composed_with_inventory() {
873        // `check:` + `inventory:` COMPOSE on the same stdout object:
874        // status/detail → Health tab, the rest → SPA projection +
875        // explode sub-tables. Must pass validation.
876        let yaml = r#"
877id: check-bitlocker-detailed
878version: 0.1.0
879execute:
880  shell: powershell
881  script: "echo x"
882  timeout: 10s
883check:
884  name: bitlocker
885inventory:
886  display:
887    - { field: status, label: Status }
888"#;
889        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
890        assert!(m.check.is_some() && m.inventory.is_some());
891        m.validate().expect("check + inventory compose");
892    }
893
894    #[test]
895    fn manifest_rejects_check_combined_with_emit() {
896        // `emit:` stdout is NDJSON (and omitted from the result), so
897        // it can't pair with `check:` (which needs a single JSON
898        // object on stdout).
899        let yaml = r#"
900id: bad-mix
901version: 0.1.0
902execute:
903  shell: powershell
904  script: "echo x"
905  timeout: 10s
906check:
907  name: bitlocker
908emit:
909  type: events
910"#;
911        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
912        let err = m.validate().expect_err("emit + check must fail");
913        assert!(err.contains("incompatible"), "err: {err}");
914    }
915
916    #[test]
917    fn manifest_rejects_emit_combined_with_inventory() {
918        // The other half of the emit-incompatibility condition.
919        let yaml = r#"
920id: bad-mix-2
921version: 0.1.0
922execute:
923  shell: powershell
924  script: "echo x"
925  timeout: 10s
926emit:
927  type: events
928inventory:
929  display:
930    - { field: status, label: Status }
931"#;
932        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
933        let err = m.validate().expect_err("emit + inventory must fail");
934        assert!(err.contains("incompatible"), "err: {err}");
935    }
936
937    #[test]
938    fn manifest_rejects_empty_check_field_names() {
939        // Empty name / status_field / detail_field are invisible
940        // runtime bugs (empty React key, agent reads the wrong field)
941        // — reject them even though serde supplies non-empty defaults.
942        let base = |inner: &str| {
943            format!(
944                "id: c\nversion: 0.1.0\nexecute:\n  shell: powershell\n  script: \"echo x\"\n  timeout: 10s\ncheck:\n{inner}"
945            )
946        };
947        for inner in [
948            "  name: \"\"\n",
949            "  name: ok\n  status_field: \"\"\n",
950            "  name: ok\n  detail_field: \"   \"\n",
951            // present-but-blank troubleshoot → broken remediation id.
952            "  name: ok\n  troubleshoot: \"  \"\n",
953        ] {
954            let m: Manifest = serde_yaml::from_str(&base(inner)).expect("parse");
955            let err = m.validate().expect_err("empty field must fail");
956            assert!(err.contains("must not be empty"), "err: {err}");
957        }
958    }
959
960    #[test]
961    fn manifest_client_absent_by_default() {
962        // A plain operator job (the overwhelming majority) carries no
963        // `client:` block, so it never surfaces in the end-user
964        // catalog.
965        let yaml = r#"
966id: echo-test
967version: 0.0.1
968execute:
969  shell: powershell
970  script: "echo 'kanade'"
971  timeout: 30s
972"#;
973        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
974        assert!(m.client.is_none());
975        m.validate().expect("operator-only job validates");
976    }
977
978    #[test]
979    fn manifest_client_parses_and_validates() {
980        // The Client App "困ったとき" remediation job shape: a
981        // user-invokable troubleshoot job with the end-user fields the
982        // KLP `jobs.list` wire needs, grouped under `client:`.
983        let yaml = r#"
984id: fix-teams-cache
985version: 1.0.0
986execute:
987  shell: powershell
988  script: "echo clearing"
989  timeout: 60s
990client:
991  name: "Teams のキャッシュをクリア"
992  description: "Teams が重いときに試してください"
993  category: troubleshoot
994  icon: brush-cleaning
995"#;
996        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
997        let c = m.client.as_ref().expect("client block present");
998        assert_eq!(c.name, "Teams のキャッシュをクリア");
999        assert_eq!(
1000            c.description.as_deref(),
1001            Some("Teams が重いときに試してください")
1002        );
1003        assert_eq!(c.category, JobCategory::Troubleshoot);
1004        assert_eq!(c.icon.as_deref(), Some("brush-cleaning"));
1005        m.validate().expect("user-invokable job validates");
1006    }
1007
1008    #[test]
1009    fn manifest_client_minimal_only_name_and_category() {
1010        // description + icon are optional; name + category are the
1011        // serde-required minimum.
1012        let yaml = r#"
1013id: install-slack
1014version: 1.0.0
1015execute:
1016  shell: powershell
1017  script: "echo install"
1018  timeout: 600s
1019client:
1020  name: Slack
1021  category: catalog
1022"#;
1023        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
1024        let c = m.client.as_ref().expect("client present");
1025        assert_eq!(c.category, JobCategory::Catalog);
1026        assert!(c.description.is_none() && c.icon.is_none());
1027        m.validate().expect("minimal client validates");
1028    }
1029
1030    #[test]
1031    fn manifest_client_rejects_blank_name() {
1032        // serde guarantees `name`/`category` are present; the one gap
1033        // is a present-but-blank name → empty catalog row title.
1034        let yaml = r#"
1035id: j
1036version: 1.0.0
1037execute:
1038  shell: powershell
1039  script: "echo x"
1040  timeout: 30s
1041client:
1042  name: "   "
1043  category: catalog
1044"#;
1045        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
1046        let err = m.validate().expect_err("blank name must fail");
1047        assert!(err.contains("client.name"), "err: {err}");
1048    }
1049
1050    #[test]
1051    fn manifest_client_rejects_blank_optional_fields() {
1052        // description / icon are optional, but a present-but-blank
1053        // value is a bug (empty subtitle / dangling icon name) — reject
1054        // it, mirroring the check: block's troubleshoot guard.
1055        for (field, line) in [
1056            ("client.description", "  description: \"  \"\n"),
1057            ("client.icon", "  icon: \"\"\n"),
1058        ] {
1059            let yaml = format!(
1060                "id: j\nversion: 1.0.0\nexecute:\n  shell: powershell\n  script: \"echo x\"\n  timeout: 30s\nclient:\n  name: A\n  category: catalog\n{line}"
1061            );
1062            let m: Manifest = serde_yaml::from_str(&yaml).expect("parse");
1063            let err = m.validate().expect_err("blank optional field must fail");
1064            assert!(err.contains(field), "expected {field} in err: {err}");
1065        }
1066    }
1067
1068    #[test]
1069    fn manifest_client_requires_category_at_parse() {
1070        // A `client:` block missing `category` is a hard parse error
1071        // (serde required field) — no manual validate() needed.
1072        let yaml = r#"
1073id: j
1074version: 1.0.0
1075execute:
1076  shell: powershell
1077  script: "echo x"
1078  timeout: 30s
1079client:
1080  name: "A job"
1081"#;
1082        let r: Result<Manifest, _> = serde_yaml::from_str(yaml);
1083        assert!(
1084            r.is_err(),
1085            "missing category must be a parse error, got {r:?}"
1086        );
1087    }
1088
1089    #[test]
1090    fn manifest_client_rejects_unknown_field() {
1091        // `deny_unknown_fields` on ClientHint catches a fat-fingered
1092        // `displayname:` instead of silently dropping it.
1093        let yaml = r#"
1094id: j
1095version: 1.0.0
1096execute:
1097  shell: powershell
1098  script: "echo x"
1099  timeout: 30s
1100client:
1101  name: "A job"
1102  category: catalog
1103  displayname: oops
1104"#;
1105        let r: Result<Manifest, _> = serde_yaml::from_str(yaml);
1106        assert!(
1107            r.is_err(),
1108            "unknown client field must be a parse error, got {r:?}"
1109        );
1110    }
1111
1112    fn execute_with(
1113        script: Option<&str>,
1114        script_file: Option<&str>,
1115        script_object: Option<&str>,
1116    ) -> Execute {
1117        Execute {
1118            shell: ExecuteShell::Powershell,
1119            script: script.map(str::to_owned),
1120            script_file: script_file.map(str::to_owned),
1121            script_object: script_object.map(str::to_owned),
1122            timeout: "30s".into(),
1123            run_as: RunAs::default(),
1124            cwd: None,
1125        }
1126    }
1127
1128    #[test]
1129    fn validate_accepts_inline_script() {
1130        let e = execute_with(Some("echo hi"), None, None);
1131        assert!(e.validate_script_source().is_ok());
1132    }
1133
1134    #[test]
1135    fn validate_accepts_script_file_alone() {
1136        let e = execute_with(None, Some("scripts/cleanup.ps1"), None);
1137        assert!(e.validate_script_source().is_ok());
1138    }
1139
1140    #[test]
1141    fn validate_accepts_script_object_alone() {
1142        let e = execute_with(None, None, Some("cleanup/1.0.0"));
1143        assert!(e.validate_script_source().is_ok());
1144    }
1145
1146    #[test]
1147    fn validate_treats_empty_inline_script_as_unset() {
1148        // `script: ""` + `script_object` set is the natural shape
1149        // when an operator comments out the YAML block-scalar body
1150        // but leaves the key. Should pass.
1151        let e = execute_with(Some(""), None, Some("cleanup/1.0.0"));
1152        assert!(e.validate_script_source().is_ok());
1153    }
1154
1155    #[test]
1156    fn validate_rejects_zero_sources() {
1157        let e = execute_with(None, None, None);
1158        let err = e.validate_script_source().unwrap_err();
1159        assert!(err.contains("must be set"), "got: {err}");
1160    }
1161
1162    #[test]
1163    fn validate_rejects_empty_inline_only() {
1164        let e = execute_with(Some(""), None, None);
1165        let err = e.validate_script_source().unwrap_err();
1166        assert!(err.contains("must be set"), "got: {err}");
1167    }
1168
1169    #[test]
1170    fn validate_rejects_inline_plus_file() {
1171        let e = execute_with(Some("echo hi"), Some("scripts/cleanup.ps1"), None);
1172        let err = e.validate_script_source().unwrap_err();
1173        assert!(err.contains("only one of"), "got: {err}");
1174    }
1175
1176    #[test]
1177    fn validate_rejects_inline_plus_object() {
1178        let e = execute_with(Some("echo hi"), None, Some("cleanup/1.0.0"));
1179        let err = e.validate_script_source().unwrap_err();
1180        assert!(err.contains("only one of"), "got: {err}");
1181    }
1182
1183    #[test]
1184    fn validate_rejects_file_plus_object() {
1185        let e = execute_with(None, Some("scripts/cleanup.ps1"), Some("cleanup/1.0.0"));
1186        let err = e.validate_script_source().unwrap_err();
1187        assert!(err.contains("only one of"), "got: {err}");
1188    }
1189
1190    #[test]
1191    fn validate_rejects_all_three() {
1192        let e = execute_with(
1193            Some("echo hi"),
1194            Some("scripts/cleanup.ps1"),
1195            Some("cleanup/1.0.0"),
1196        );
1197        let err = e.validate_script_source().unwrap_err();
1198        assert!(err.contains("only one of"), "got: {err}");
1199    }
1200
1201    #[test]
1202    fn manifest_deserialises_script_object_yaml() {
1203        // SPEC §2.4.1 example shape with the Object Store
1204        // reference picked over inline.
1205        let yaml = r#"
1206id: cleanup-disk-temp
1207version: 1.0.1
1208execute:
1209  shell: powershell
1210  script_object: cleanup-disk-temp/1.0.1
1211  timeout: 600s
1212"#;
1213        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
1214        assert_eq!(
1215            m.execute.script_object.as_deref(),
1216            Some("cleanup-disk-temp/1.0.1")
1217        );
1218        assert!(m.execute.script.is_none());
1219        m.validate()
1220            .expect("script_object-only manifest passes validation");
1221    }
1222
1223    #[test]
1224    fn manifest_rejects_typo_in_script_field_name() {
1225        // `deny_unknown_fields` on Execute catches `script_objectt`
1226        // and similar fat-fingers at parse time instead of letting
1227        // them silently fall through to "all three unset".
1228        let yaml = r#"
1229id: typo
1230version: 1.0.0
1231execute:
1232  shell: powershell
1233  script_objectt: oops
1234  timeout: 30s
1235"#;
1236        let r: Result<Manifest, _> = serde_yaml::from_str(yaml);
1237        assert!(r.is_err(), "expected parse error, got {r:?}");
1238    }
1239
1240    #[test]
1241    fn schedule_carries_target_and_rollout() {
1242        let yaml = r#"
1243id: hourly-cleanup-canary
1244when:
1245  per_pc: { every: 1h }
1246job_id: cleanup
1247enabled: true
1248target:
1249  groups: [canary, wave1]
1250jitter: 30s
1251rollout:
1252  strategy: wave
1253  waves:
1254    - { group: canary, delay: 0s }
1255    - { group: wave1,  delay: 5s }
1256"#;
1257        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
1258        assert_eq!(s.id, "hourly-cleanup-canary");
1259        assert_eq!(s.job_id, "cleanup");
1260        assert_eq!(s.plan.target.groups, vec!["canary", "wave1"]);
1261        assert_eq!(s.plan.jitter.as_deref(), Some("30s"));
1262        let rollout = s.plan.rollout.expect("rollout present");
1263        assert_eq!(rollout.waves.len(), 2);
1264        assert_eq!(rollout.waves[0].group, "canary");
1265        assert_eq!(rollout.waves[1].delay, "5s");
1266        assert_eq!(rollout.strategy, RolloutStrategy::Wave);
1267    }
1268
1269    #[test]
1270    fn schedule_minimal_target_all() {
1271        let yaml = r#"
1272id: kitting
1273when:
1274  per_pc: once
1275enabled: true
1276job_id: scheduled-echo
1277target: { all: true }
1278"#;
1279        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
1280        assert_eq!(s.id, "kitting");
1281        assert_eq!(s.when, When::PerPc(PerPolicy::Once(OnceLiteral::Once)));
1282        assert!(s.enabled);
1283        assert_eq!(s.job_id, "scheduled-echo");
1284        assert!(s.plan.target.all);
1285        assert!(s.plan.rollout.is_none());
1286        assert!(s.plan.jitter.is_none());
1287        assert!(s.active.is_empty());
1288    }
1289
1290    #[test]
1291    fn schedule_enabled_defaults_to_true() {
1292        let yaml = r#"
1293id: x
1294when:
1295  per_pc: once
1296job_id: y
1297target: { all: true }
1298"#;
1299        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
1300        assert!(s.enabled);
1301    }
1302
1303    // ---- `when` parsing (#418 Phase 1) ----
1304
1305    fn schedule_yaml_with(when_block: &str) -> String {
1306        format!(
1307            r#"
1308id: x
1309when:
1310{when_block}
1311job_id: y
1312target: {{ all: true }}
1313"#
1314        )
1315    }
1316
1317    #[test]
1318    fn when_per_pc_every_parses_unquoted_humantime() {
1319        // `6h` is digit-led but non-numeric → YAML string, same as
1320        // the old `cooldown: 6h` convention. No quotes needed.
1321        let s: Schedule =
1322            serde_yaml::from_str(&schedule_yaml_with("  per_pc: { every: 6h }")).expect("parse");
1323        assert_eq!(
1324            s.when,
1325            When::PerPc(PerPolicy::Every(EverySpec { every: "6h".into() }))
1326        );
1327    }
1328
1329    #[test]
1330    fn when_per_target_every_parses() {
1331        let s: Schedule = serde_yaml::from_str(&schedule_yaml_with("  per_target: { every: 24h }"))
1332            .expect("parse");
1333        assert_eq!(
1334            s.when,
1335            When::PerTarget(PerPolicy::Every(EverySpec {
1336                every: "24h".into()
1337            }))
1338        );
1339    }
1340
1341    #[test]
1342    fn when_per_target_once_parses() {
1343        // Falls out of the shared PerPolicy shape and decide_fire
1344        // already implements it ("any one pc succeeds → skip the
1345        // target forever"), so it is allowed, not rejected.
1346        let s: Schedule =
1347            serde_yaml::from_str(&schedule_yaml_with("  per_target: once")).expect("parse");
1348        assert_eq!(s.when, When::PerTarget(PerPolicy::Once(OnceLiteral::Once)));
1349    }
1350
1351    #[test]
1352    fn when_calendar_time_parses() {
1353        let s: Schedule = serde_yaml::from_str(&schedule_yaml_with(
1354            "  calendar:\n    at: \"09:00\"\n    days: [mon-fri]",
1355        ))
1356        .expect("parse");
1357        match &s.when {
1358            When::Calendar(c) => {
1359                assert_eq!(c.at, "09:00");
1360                assert_eq!(c.days, vec!["mon-fri"]);
1361            }
1362            other => panic!("expected calendar, got {other:?}"),
1363        }
1364    }
1365
1366    #[test]
1367    fn when_calendar_days_default_empty() {
1368        let s: Schedule =
1369            serde_yaml::from_str(&schedule_yaml_with("  calendar:\n    at: \"09:00\""))
1370                .expect("parse");
1371        match &s.when {
1372            When::Calendar(c) => assert!(c.days.is_empty(), "days defaults to empty (= daily)"),
1373            other => panic!("expected calendar, got {other:?}"),
1374        }
1375    }
1376
1377    #[test]
1378    fn when_calendar_datetime_parses_all_separators() {
1379        // one-shot: date+time in hyphen / ISO-T / slash forms
1380        for at in ["2026-06-10 09:00", "2026-06-10T09:00", "2026/06/10 09:00"] {
1381            let block = format!("  calendar:\n    at: \"{at}\"");
1382            let s: Schedule = serde_yaml::from_str(&schedule_yaml_with(&block))
1383                .unwrap_or_else(|e| panic!("parse '{at}': {e}"));
1384            match &s.when {
1385                When::Calendar(c) => {
1386                    use chrono::Datelike;
1387                    let p = c.parse_at().expect("parse_at");
1388                    let d = p.date.expect("datetime at carries a date");
1389                    assert_eq!((d.year(), d.month(), d.day()), (2026, 6, 10), "for '{at}'");
1390                }
1391                other => panic!("expected calendar, got {other:?}"),
1392            }
1393        }
1394    }
1395
1396    #[test]
1397    fn when_rejects_bad_once_keyword() {
1398        // `onec` must be a parse error, not a silently-absorbed
1399        // string (OnceLiteral is a single-variant enum for exactly
1400        // this reason).
1401        let r: Result<Schedule, _> = serde_yaml::from_str(&schedule_yaml_with("  per_pc: onec"));
1402        assert!(r.is_err(), "expected parse error, got {r:?}");
1403    }
1404
1405    #[test]
1406    fn when_rejects_unknown_key_in_every() {
1407        // EverySpec is deny_unknown_fields so `evry:` typos fail
1408        // even under the untagged PerPolicy.
1409        let r: Result<Schedule, _> =
1410            serde_yaml::from_str(&schedule_yaml_with("  per_pc: { evry: 6h }"));
1411        assert!(r.is_err(), "expected parse error, got {r:?}");
1412    }
1413
1414    #[test]
1415    fn when_rejects_unknown_variant() {
1416        let r: Result<Schedule, _> =
1417            serde_yaml::from_str(&schedule_yaml_with("  per_galaxy: once"));
1418        assert!(r.is_err(), "expected parse error, got {r:?}");
1419    }
1420
1421    #[test]
1422    fn when_rejects_old_top_level_cron_field() {
1423        // Pre-#418 shape: top-level `cron:` + no `when:`. Must fail
1424        // loudly (missing `when`), which is what turns stale KV
1425        // blobs into warn-skips after the upgrade.
1426        let yaml = r#"
1427id: x
1428cron: "* * * * * *"
1429job_id: y
1430target: { all: true }
1431"#;
1432        let r: Result<Schedule, _> = serde_yaml::from_str(yaml);
1433        assert!(r.is_err(), "expected parse error, got {r:?}");
1434    }
1435
1436    #[test]
1437    fn when_rejects_retired_cron_escape_hatch() {
1438        // #418 Phase 2 retired `when: { cron: "..." }`. A raw cron
1439        // is now an unknown variant → parse error (operators use the
1440        // calendar form instead).
1441        let r: Result<Schedule, _> =
1442            serde_yaml::from_str(&schedule_yaml_with("  cron: \"0 0 9 * * mon-fri\""));
1443        assert!(
1444            r.is_err(),
1445            "expected parse error for retired cron, got {r:?}"
1446        );
1447    }
1448
1449    #[test]
1450    fn when_round_trips_json_and_yaml() {
1451        // Round-trip through the full Schedule: that is the wire
1452        // unit for both stores (JSON catalog KV + YAML mirror), and
1453        // it exercises the singleton_map field attribute that keeps
1454        // serde_yaml on the map shape instead of `!per_pc` tags.
1455        for when in [
1456            When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
1457            When::PerPc(PerPolicy::Every(EverySpec { every: "6h".into() })),
1458            When::PerTarget(PerPolicy::Once(OnceLiteral::Once)),
1459            When::PerTarget(PerPolicy::Every(EverySpec {
1460                every: "24h".into(),
1461            })),
1462            calendar("09:00", &["mon-fri"]),
1463            calendar("2026-06-10 09:00", &[]),
1464        ] {
1465            let s = schedule_with(when.clone(), RunsOn::Backend);
1466
1467            let json = serde_json::to_string(&s).expect("json serialise");
1468            let back: Schedule = serde_json::from_str(&json).expect("json deserialise");
1469            assert_eq!(back.when, when, "json round-trip for {when}");
1470
1471            let yaml = serde_yaml::to_string(&s).expect("yaml serialise");
1472            assert!(
1473                !yaml.contains('!'),
1474                "yaml must use the map shape, not tags: {yaml}"
1475            );
1476            let back: Schedule = serde_yaml::from_str(&yaml).expect("yaml deserialise");
1477            assert_eq!(back.when, when, "yaml round-trip for {when}");
1478        }
1479    }
1480
1481    #[test]
1482    fn when_once_serialises_as_bare_keyword() {
1483        // The wire shape operators see in the YAML mirror must stay
1484        // the ergonomic `per_pc: once`, not a one-variant map.
1485        let json = serde_json::to_value(When::PerPc(PerPolicy::Once(OnceLiteral::Once)))
1486            .expect("serialise");
1487        assert_eq!(json, serde_json::json!({ "per_pc": "once" }));
1488    }
1489
1490    #[test]
1491    fn when_displays_operator_summary() {
1492        for (when, expected) in [
1493            (
1494                When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
1495                "per_pc once",
1496            ),
1497            (
1498                When::PerPc(PerPolicy::Every(EverySpec { every: "6h".into() })),
1499                "per_pc every 6h",
1500            ),
1501            (
1502                When::PerTarget(PerPolicy::Every(EverySpec {
1503                    every: "24h".into(),
1504                })),
1505                "per_target every 24h",
1506            ),
1507            (calendar("09:00", &["mon-fri"]), "at 09:00 [mon-fri]"),
1508            (calendar("2026-06-10 09:00", &[]), "at 2026-06-10 09:00"),
1509        ] {
1510            assert_eq!(when.to_string(), expected);
1511        }
1512    }
1513
1514    // ---- lowering (#418: when → engine vocabulary) ----
1515
1516    fn schedule_with(when: When, runs_on: RunsOn) -> Schedule {
1517        Schedule {
1518            id: "x".into(),
1519            when,
1520            job_id: "y".into(),
1521            plan: FanoutPlan::default(),
1522            active: Active::default(),
1523            constraints: Constraints::default(),
1524            on_failure: OnFailure::default(),
1525            tz: ScheduleTz::default(),
1526            starting_deadline: None,
1527            runs_on,
1528            enabled: true,
1529        }
1530    }
1531
1532    fn calendar(at: &str, days: &[&str]) -> When {
1533        When::Calendar(CalendarSpec {
1534            at: at.into(),
1535            days: days.iter().map(|d| (*d).to_string()).collect(),
1536        })
1537    }
1538
1539    #[test]
1540    fn lowering_matches_the_418_table() {
1541        let cases = [
1542            (
1543                When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
1544                (POLL_CRON, ExecMode::OncePerPc, None),
1545            ),
1546            (
1547                When::PerPc(PerPolicy::Every(EverySpec { every: "6h".into() })),
1548                (POLL_CRON, ExecMode::OncePerPc, Some("6h")),
1549            ),
1550            (
1551                When::PerTarget(PerPolicy::Once(OnceLiteral::Once)),
1552                (POLL_CRON, ExecMode::OncePerTarget, None),
1553            ),
1554            (
1555                When::PerTarget(PerPolicy::Every(EverySpec {
1556                    every: "24h".into(),
1557                })),
1558                (POLL_CRON, ExecMode::OncePerTarget, Some("24h")),
1559            ),
1560            // calendar repeating → 6-field cron
1561            (
1562                calendar("09:00", &["mon-fri"]),
1563                ("0 0 9 * * mon-fri", ExecMode::EveryTick, None),
1564            ),
1565            // calendar daily (no days) → DOW *
1566            (
1567                calendar("18:30", &[]),
1568                ("0 30 18 * * *", ExecMode::EveryTick, None),
1569            ),
1570            // calendar one-shot → 7-field year cron
1571            (
1572                calendar("2026-06-10 09:00", &[]),
1573                ("0 0 9 10 6 * 2026", ExecMode::EveryTick, None),
1574            ),
1575        ];
1576        for (when, (cron, mode, cooldown)) in cases {
1577            let l = schedule_with(when.clone(), RunsOn::Backend).lowered();
1578            assert_eq!(l.cron, cron, "cron for {when}");
1579            assert_eq!(l.mode, mode, "mode for {when}");
1580            assert_eq!(l.cooldown.as_deref(), cooldown, "cooldown for {when}");
1581        }
1582    }
1583
1584    #[test]
1585    fn lowered_carries_schedule_tz() {
1586        for (tz, want) in [
1587            (ScheduleTz::Local, ScheduleTz::Local),
1588            (ScheduleTz::Utc, ScheduleTz::Utc),
1589        ] {
1590            let mut s = schedule_with(calendar("09:00", &["mon-fri"]), RunsOn::Backend);
1591            s.tz = tz;
1592            assert_eq!(s.lowered().tz, want, "calendar carries tz");
1593            // reconcile shapes carry tz too (for the active-window check)
1594            let mut s = schedule_with(
1595                When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
1596                RunsOn::Backend,
1597            );
1598            s.tz = tz;
1599            assert_eq!(s.lowered().tz, want, "reconcile carries tz");
1600        }
1601    }
1602
1603    #[test]
1604    fn poll_cron_is_accepted_by_the_engine_parser() {
1605        // POLL_CRON is system-generated — if the engine's parser
1606        // ever rejected it every reconcile schedule would die at
1607        // register time. Validate it with the same croner config
1608        // (Seconds::Required, dom_and_dow, year optional).
1609        croner::parser::CronParser::builder()
1610            .seconds(croner::parser::Seconds::Required)
1611            .dom_and_dow(true)
1612            .build()
1613            .parse(POLL_CRON)
1614            .expect("POLL_CRON must parse");
1615    }
1616
1617    // ---- Schedule::validate() (#418 decision F) ----
1618
1619    #[test]
1620    fn validate_accepts_reconcile_shapes() {
1621        for when in [
1622            When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
1623            When::PerPc(PerPolicy::Every(EverySpec { every: "6h".into() })),
1624            When::PerTarget(PerPolicy::Once(OnceLiteral::Once)),
1625            When::PerTarget(PerPolicy::Every(EverySpec {
1626                every: "24h".into(),
1627            })),
1628        ] {
1629            schedule_with(when.clone(), RunsOn::Backend)
1630                .validate()
1631                .unwrap_or_else(|e| panic!("{when} should validate: {e}"));
1632        }
1633    }
1634
1635    #[test]
1636    fn validate_accepts_per_pc_on_agent() {
1637        schedule_with(
1638            When::PerPc(PerPolicy::Every(EverySpec { every: "1h".into() })),
1639            RunsOn::Agent,
1640        )
1641        .validate()
1642        .expect("per_pc + agent is the offline-inventory shape");
1643    }
1644
1645    #[test]
1646    fn validate_rejects_per_target_on_agent() {
1647        let err = schedule_with(
1648            When::PerTarget(PerPolicy::Every(EverySpec {
1649                every: "24h".into(),
1650            })),
1651            RunsOn::Agent,
1652        )
1653        .validate()
1654        .unwrap_err();
1655        assert!(err.contains("per_target"), "got: {err}");
1656        assert!(err.contains("runs_on: agent"), "got: {err}");
1657
1658        // per_target: once is also backend-only.
1659        let err = schedule_with(
1660            When::PerTarget(PerPolicy::Once(OnceLiteral::Once)),
1661            RunsOn::Agent,
1662        )
1663        .validate()
1664        .unwrap_err();
1665        assert!(err.contains("per_target"), "got (once): {err}");
1666        assert!(err.contains("runs_on: agent"), "got (once): {err}");
1667    }
1668
1669    #[test]
1670    fn validate_rejects_bad_every_duration() {
1671        let err = schedule_with(
1672            When::PerPc(PerPolicy::Every(EverySpec { every: "6x".into() })),
1673            RunsOn::Backend,
1674        )
1675        .validate()
1676        .unwrap_err();
1677        assert!(err.contains("when.every"), "got: {err}");
1678    }
1679
1680    #[test]
1681    fn validate_rejects_bad_jitter_and_starting_deadline() {
1682        let mut s = schedule_with(
1683            When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
1684            RunsOn::Backend,
1685        );
1686        s.plan.jitter = Some("5x".into());
1687        let err = s.validate().unwrap_err();
1688        assert!(err.contains("jitter"), "got: {err}");
1689
1690        let mut s = schedule_with(
1691            When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
1692            RunsOn::Backend,
1693        );
1694        s.starting_deadline = Some("soon".into());
1695        let err = s.validate().unwrap_err();
1696        assert!(err.contains("starting_deadline"), "got: {err}");
1697    }
1698
1699    #[test]
1700    fn validate_accepts_calendar_shapes() {
1701        for when in [
1702            calendar("09:00", &["mon-fri"]),   // weekday morning
1703            calendar("00:00", &["sun"]),       // weekly
1704            calendar("18:30", &[]),            // daily
1705            calendar("2026-06-10 09:00", &[]), // one-shot
1706            calendar("2026/12/25 00:00", &[]), // one-shot, slash form
1707        ] {
1708            schedule_with(when.clone(), RunsOn::Backend)
1709                .validate()
1710                .unwrap_or_else(|e| panic!("{when} should validate: {e}"));
1711        }
1712    }
1713
1714    #[test]
1715    fn validate_rejects_bad_at() {
1716        for bad in ["25:00", "09:60", "9", "noon", "2026-13-01 09:00"] {
1717            let err = schedule_with(calendar(bad, &[]), RunsOn::Backend)
1718                .validate()
1719                .unwrap_err();
1720            assert!(err.contains("when.at"), "for '{bad}', got: {err}");
1721        }
1722    }
1723
1724    #[test]
1725    fn validate_rejects_datetime_at_with_days() {
1726        // A dated `at` is a one-shot — pairing it with days is a
1727        // contradiction (the date already pins the day).
1728        let err = schedule_with(calendar("2026-06-10 09:00", &["mon"]), RunsOn::Backend)
1729            .validate()
1730            .unwrap_err();
1731        assert!(
1732            err.contains("one-shot") && err.contains("days"),
1733            "got: {err}"
1734        );
1735    }
1736
1737    #[test]
1738    fn validate_rejects_bad_day_name() {
1739        // A garbage DOW token is caught by the days pre-flight and
1740        // reported against `when.days`, not the confusing
1741        // "when.at lowered to invalid cron" (claude #432 review).
1742        let err = schedule_with(calendar("09:00", &["funday"]), RunsOn::Backend)
1743            .validate()
1744            .unwrap_err();
1745        assert!(err.contains("when.days"), "got: {err}");
1746        assert!(err.contains("funday"), "names the bad token: {err}");
1747        // a degenerate range like `mon-` reports the whole token, not
1748        // a cryptic empty part (claude #432 follow-up)
1749        let err = schedule_with(calendar("09:00", &["mon-"]), RunsOn::Backend)
1750            .validate()
1751            .unwrap_err();
1752        assert!(err.contains("'mon-'"), "names the whole token: {err}");
1753        // valid names / ranges / numeric / * all pass
1754        for ok in [
1755            calendar("09:00", &["mon-fri"]),
1756            calendar("09:00", &["mon", "wed", "sun"]),
1757            calendar("09:00", &["1-5"]),
1758        ] {
1759            schedule_with(ok.clone(), RunsOn::Backend)
1760                .validate()
1761                .unwrap_or_else(|e| panic!("{ok} should validate: {e}"));
1762        }
1763    }
1764
1765    #[test]
1766    fn calendar_oneshot_instant_detects_past() {
1767        use chrono::TimeZone;
1768        // a dated `at` resolves to an absolute instant…
1769        let c = CalendarSpec {
1770            at: "2024-01-01 09:00".into(),
1771            days: vec![],
1772        };
1773        let t = c
1774            .oneshot_instant(ScheduleTz::Utc)
1775            .expect("one-shot instant");
1776        assert_eq!(
1777            t,
1778            chrono::Utc.with_ymd_and_hms(2024, 1, 1, 9, 0, 0).unwrap()
1779        );
1780        assert!(t < chrono::Utc::now(), "2024 is in the past");
1781        // …while a repeating (time-only) calendar has no instant
1782        let rep = CalendarSpec {
1783            at: "09:00".into(),
1784            days: vec!["mon-fri".into()],
1785        };
1786        assert!(rep.oneshot_instant(ScheduleTz::Utc).is_none());
1787    }
1788
1789    fn schedule_with_active(from: Option<&str>, until: Option<&str>) -> Schedule {
1790        let mut s = schedule_with(
1791            When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
1792            RunsOn::Backend,
1793        );
1794        s.active = Active {
1795            from: from.map(str::to_owned),
1796            until: until.map(str::to_owned),
1797        };
1798        s
1799    }
1800
1801    #[test]
1802    fn validate_accepts_active_window() {
1803        schedule_with_active(Some("2026-07-01"), Some("2026-08-01T12:00:00+09:00"))
1804            .validate()
1805            .expect("date + rfc3339 bounds should validate");
1806    }
1807
1808    #[test]
1809    fn validate_rejects_unparseable_active_bound() {
1810        let err = schedule_with_active(Some("July 1st"), None)
1811            .validate()
1812            .unwrap_err();
1813        assert!(err.contains("active"), "got: {err}");
1814    }
1815
1816    #[test]
1817    fn validate_rejects_from_not_before_until() {
1818        let err = schedule_with_active(Some("2026-08-01"), Some("2026-07-01"))
1819            .validate()
1820            .unwrap_err();
1821        assert!(err.contains("strictly before"), "got: {err}");
1822
1823        let err = schedule_with_active(Some("2026-07-01"), Some("2026-07-01"))
1824            .validate()
1825            .unwrap_err();
1826        assert!(err.contains("strictly before"), "got: {err}");
1827    }
1828
1829    // ---- Active window semantics ----
1830
1831    #[test]
1832    fn active_window_is_half_open() {
1833        use chrono::TimeZone;
1834        let active = Active {
1835            from: Some("2026-07-01".into()),
1836            until: Some("2026-08-01".into()),
1837        };
1838        // UTC tz so the date bounds are UTC midnight.
1839        let at = |y, m, d, h| chrono::Utc.with_ymd_and_hms(y, m, d, h, 0, 0).unwrap();
1840        let c = |t| active.contains(t, ScheduleTz::Utc);
1841        assert!(!c(at(2026, 6, 30, 23)), "before from");
1842        assert!(c(at(2026, 7, 1, 0)), "at from (inclusive)");
1843        assert!(c(at(2026, 7, 15, 12)), "inside");
1844        assert!(!c(at(2026, 8, 1, 0)), "at until (exclusive)");
1845        assert!(!c(at(2026, 8, 2, 0)), "after until");
1846    }
1847
1848    #[test]
1849    fn active_empty_window_is_always_active() {
1850        assert!(Active::default().contains(chrono::Utc::now(), ScheduleTz::Local));
1851    }
1852
1853    #[test]
1854    fn active_rfc3339_bound_honours_offset_regardless_of_tz() {
1855        use chrono::TimeZone;
1856        let active = Active {
1857            from: Some("2026-07-01T09:00:00+09:00".into()),
1858            until: None,
1859        };
1860        // RFC3339 carries its own offset → tz arg is ignored.
1861        // 09:00 JST = 00:00 UTC.
1862        for tz in [ScheduleTz::Utc, ScheduleTz::Local] {
1863            assert!(
1864                !active.contains(
1865                    chrono::Utc
1866                        .with_ymd_and_hms(2026, 6, 30, 23, 59, 0)
1867                        .unwrap(),
1868                    tz
1869                )
1870            );
1871            assert!(active.contains(
1872                chrono::Utc.with_ymd_and_hms(2026, 7, 1, 0, 0, 0).unwrap(),
1873                tz
1874            ));
1875        }
1876    }
1877
1878    #[test]
1879    fn active_date_bound_respects_tz() {
1880        // A bare `YYYY-MM-DD` bound is midnight *in the schedule's
1881        // tz* (#418 Phase 2). The UTC interpretation is exact and
1882        // host-independent; assert that precisely.
1883        use chrono::TimeZone;
1884        let utc = Active::parse_bound("2026-07-01", ScheduleTz::Utc).expect("utc");
1885        assert_eq!(
1886            utc,
1887            chrono::Utc.with_ymd_and_hms(2026, 7, 1, 0, 0, 0).unwrap()
1888        );
1889
1890        // The local interpretation must equal what chrono::Local
1891        // computes for the same wall-clock midnight — proves the tz
1892        // path is wired to the host zone (the magnitude vs UTC is
1893        // host-dependent, so we compare against Local directly rather
1894        // than hard-coding the JST offset, keeping CI green on UTC
1895        // runners).
1896        let local = Active::parse_bound("2026-07-01", ScheduleTz::Local).expect("local");
1897        let want = chrono::Local
1898            .with_ymd_and_hms(2026, 7, 1, 0, 0, 0)
1899            .single()
1900            .expect("local midnight is unambiguous")
1901            .with_timezone(&chrono::Utc);
1902        assert_eq!(local, want, "date bound resolved in host-local tz");
1903    }
1904
1905    #[test]
1906    fn active_empty_is_skipped_when_serialising() {
1907        let s = schedule_with(
1908            When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
1909            RunsOn::Backend,
1910        );
1911        let json = serde_json::to_value(&s).expect("serialise");
1912        assert!(
1913            json.get("active").is_none(),
1914            "empty active must not appear on the wire: {json}"
1915        );
1916    }
1917
1918    // ---- constraints.window (#418 Phase 3) ----
1919
1920    fn with_window(win: &str) -> Schedule {
1921        let mut s = schedule_with(
1922            When::PerPc(PerPolicy::Every(EverySpec { every: "6h".into() })),
1923            RunsOn::Backend,
1924        );
1925        s.constraints.window = Some(win.into());
1926        s
1927    }
1928
1929    #[test]
1930    fn constraints_window_parses_and_round_trips() {
1931        let yaml = r#"
1932id: x
1933when:
1934  per_pc: { every: 6h }
1935job_id: y
1936target: { all: true }
1937constraints:
1938  window: "22:00-05:00"
1939"#;
1940        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
1941        assert_eq!(s.constraints.window.as_deref(), Some("22:00-05:00"));
1942        let back: Schedule =
1943            serde_json::from_str(&serde_json::to_string(&s).expect("ser")).expect("de");
1944        assert_eq!(back.constraints.window.as_deref(), Some("22:00-05:00"));
1945    }
1946
1947    #[test]
1948    fn constraints_empty_is_skipped_when_serialising() {
1949        let s = schedule_with(
1950            When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
1951            RunsOn::Backend,
1952        );
1953        let json = serde_json::to_value(&s).expect("serialise");
1954        assert!(
1955            json.get("constraints").is_none(),
1956            "empty constraints must not appear on the wire: {json}"
1957        );
1958    }
1959
1960    #[test]
1961    fn window_no_constraint_always_allows() {
1962        let c = Constraints::default();
1963        assert!(c.allows(chrono::Utc::now(), ScheduleTz::Local));
1964    }
1965
1966    #[test]
1967    fn window_same_day_is_half_open() {
1968        use chrono::TimeZone;
1969        let s = with_window("09:00-17:00");
1970        let at = |h, m| chrono::Utc.with_ymd_and_hms(2026, 6, 9, h, m, 0).unwrap();
1971        let a = |t| s.constraints.allows(t, ScheduleTz::Utc);
1972        assert!(!a(at(8, 59)), "before start");
1973        assert!(a(at(9, 0)), "at start (inclusive)");
1974        assert!(a(at(16, 59)), "inside");
1975        assert!(!a(at(17, 0)), "at end (exclusive)");
1976        assert!(!a(at(23, 0)), "after end");
1977    }
1978
1979    #[test]
1980    fn window_crossing_midnight() {
1981        use chrono::TimeZone;
1982        let s = with_window("22:00-05:00");
1983        let at = |h, m| chrono::Utc.with_ymd_and_hms(2026, 6, 9, h, m, 0).unwrap();
1984        let a = |t| s.constraints.allows(t, ScheduleTz::Utc);
1985        assert!(a(at(22, 0)), "at start tonight");
1986        assert!(a(at(23, 30)), "late tonight");
1987        assert!(a(at(3, 0)), "early tomorrow");
1988        assert!(!a(at(5, 0)), "at end (exclusive)");
1989        assert!(!a(at(12, 0)), "midday outside");
1990        assert!(!a(at(21, 59)), "just before start");
1991    }
1992
1993    #[test]
1994    fn window_respects_tz() {
1995        // The same instant is inside the window under one tz and may
1996        // be outside under another. Compare UTC vs Local via the
1997        // host's own offset (kept CI-green on UTC runners like the
1998        // active tz test does).
1999        use chrono::TimeZone;
2000        let s = with_window("09:00-17:00");
2001        let noon_utc = chrono::Utc.with_ymd_and_hms(2026, 6, 9, 12, 0, 0).unwrap();
2002        // Under UTC, 12:00 is inside 09:00-17:00.
2003        assert!(s.constraints.allows(noon_utc, ScheduleTz::Utc));
2004        // Under Local, the verdict tracks the host wall-clock time;
2005        // assert it matches a direct wall_time membership check.
2006        let local_t = noon_utc.with_timezone(&chrono::Local).time();
2007        let in_local = local_t >= chrono::NaiveTime::from_hms_opt(9, 0, 0).unwrap()
2008            && local_t < chrono::NaiveTime::from_hms_opt(17, 0, 0).unwrap();
2009        assert_eq!(s.constraints.allows(noon_utc, ScheduleTz::Local), in_local);
2010    }
2011
2012    #[test]
2013    fn validate_accepts_good_window() {
2014        for w in ["09:00-17:00", "22:00-05:00", "00:00-23:59"] {
2015            with_window(w)
2016                .validate()
2017                .unwrap_or_else(|e| panic!("'{w}' should validate: {e}"));
2018        }
2019    }
2020
2021    #[test]
2022    fn validate_rejects_bad_window() {
2023        for bad in ["9-5", "22:00", "22:00-22:00", "25:00-05:00", "09:00_17:00"] {
2024            let err = with_window(bad).validate().unwrap_err();
2025            assert!(
2026                err.contains("constraints.window"),
2027                "for '{bad}', got: {err}"
2028            );
2029        }
2030    }
2031
2032    #[test]
2033    fn window_fail_closed_on_corrupt_blob() {
2034        // A malformed window (only reachable via a hand-edited KV
2035        // blob — validate() rejects it at create) must BLOCK, not
2036        // silently allow fires during a change-freeze (gemini #452).
2037        let s = with_window("22:00_05:00");
2038        assert!(
2039            !s.constraints.allows(chrono::Utc::now(), ScheduleTz::Utc),
2040            "corrupt window fails closed"
2041        );
2042        // …and the scheduler can surface why it's stuck.
2043        assert!(
2044            s.bad_window().is_some(),
2045            "bad_window reports the parse error"
2046        );
2047        assert!(with_window("22:00-05:00").bad_window().is_none());
2048    }
2049
2050    #[test]
2051    fn calendar_outside_window_is_flagged() {
2052        // at 09:00 can never fall in 22:00-05:00 → never fires.
2053        let mut s = schedule_with(calendar("09:00", &["mon-fri"]), RunsOn::Backend);
2054        s.constraints.window = Some("22:00-05:00".into());
2055        assert!(s.calendar_outside_window(), "09:00 is not in 22:00-05:00");
2056
2057        // at 23:00 IS inside the overnight window → fine.
2058        let mut s = schedule_with(calendar("23:00", &[]), RunsOn::Backend);
2059        s.constraints.window = Some("22:00-05:00".into());
2060        assert!(!s.calendar_outside_window(), "23:00 is in 22:00-05:00");
2061
2062        // reconcile shapes are never flagged (they poll every minute).
2063        let mut s = schedule_with(
2064            When::PerPc(PerPolicy::Every(EverySpec { every: "6h".into() })),
2065            RunsOn::Backend,
2066        );
2067        s.constraints.window = Some("22:00-05:00".into());
2068        assert!(!s.calendar_outside_window(), "reconcile is unaffected");
2069
2070        // no window → never flagged.
2071        let s = schedule_with(calendar("09:00", &[]), RunsOn::Backend);
2072        assert!(!s.calendar_outside_window());
2073    }
2074
2075    // ---- on_failure.retry (#418 Phase 4) ----
2076
2077    fn with_retry(max: u32, backoff: &str) -> Schedule {
2078        let mut s = schedule_with(
2079            When::PerPc(PerPolicy::Every(EverySpec { every: "6h".into() })),
2080            RunsOn::Backend,
2081        );
2082        s.on_failure.retry = Some(Retry {
2083            max,
2084            backoff: backoff.into(),
2085        });
2086        s
2087    }
2088
2089    #[test]
2090    fn on_failure_parses_and_round_trips() {
2091        let yaml = r#"
2092id: x
2093when:
2094  per_pc: { every: 6h }
2095job_id: y
2096target: { all: true }
2097on_failure:
2098  retry: { max: 3, backoff: 10m }
2099"#;
2100        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
2101        let r = s.on_failure.retry.as_ref().expect("retry present");
2102        assert_eq!(r.max, 3);
2103        assert_eq!(r.backoff, "10m");
2104        let back: Schedule =
2105            serde_json::from_str(&serde_json::to_string(&s).expect("ser")).expect("de");
2106        assert_eq!(back.on_failure, s.on_failure);
2107    }
2108
2109    #[test]
2110    fn on_failure_empty_is_skipped_when_serialising() {
2111        let s = schedule_with(
2112            When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
2113            RunsOn::Backend,
2114        );
2115        let json = serde_json::to_value(&s).expect("serialise");
2116        assert!(
2117            json.get("on_failure").is_none(),
2118            "empty on_failure must not appear on the wire: {json}"
2119        );
2120    }
2121
2122    #[test]
2123    fn validate_accepts_good_retry() {
2124        for (max, backoff) in [(1, "30s"), (3, "10m"), (10, "1h")] {
2125            with_retry(max, backoff)
2126                .validate()
2127                .unwrap_or_else(|e| panic!("retry {{max:{max}, backoff:{backoff}}}: {e}"));
2128        }
2129    }
2130
2131    #[test]
2132    fn validate_rejects_bad_backoff() {
2133        let err = with_retry(3, "soon").validate().unwrap_err();
2134        assert!(err.contains("on_failure.retry.backoff"), "got: {err}");
2135    }
2136
2137    #[test]
2138    fn validate_rejects_sub_second_backoff() {
2139        // "500ms" parses as humantime but lowers to 0s on the wire —
2140        // reject it so the operator doesn't get a silent no-wait
2141        // (coderabbit #466).
2142        for bad in ["500ms", "0s", "999ms"] {
2143            let err = with_retry(3, bad).validate().unwrap_err();
2144            assert!(
2145                err.contains("on_failure.retry.backoff must be >= 1s"),
2146                "for '{bad}', got: {err}"
2147            );
2148        }
2149    }
2150
2151    #[test]
2152    fn validate_rejects_out_of_range_max() {
2153        for bad in [0u32, 11, 1000] {
2154            let err = with_retry(bad, "10m").validate().unwrap_err();
2155            assert!(
2156                err.contains("on_failure.retry.max"),
2157                "for max={bad}, got: {err}"
2158            );
2159        }
2160    }
2161
2162    #[test]
2163    fn lowered_retry_reduces_backoff_to_seconds() {
2164        let s = with_retry(3, "10m");
2165        let spec = s.on_failure.lowered_retry().expect("a retry policy");
2166        assert_eq!(spec.max, 3);
2167        assert_eq!(spec.backoff_secs, 600);
2168    }
2169
2170    #[test]
2171    fn lowered_retry_is_none_without_policy() {
2172        let s = schedule_with(
2173            When::PerPc(PerPolicy::Once(OnceLiteral::Once)),
2174            RunsOn::Backend,
2175        );
2176        assert!(s.on_failure.lowered_retry().is_none());
2177    }
2178
2179    #[test]
2180    fn shipped_schedule_configs_parse_and_validate() {
2181        // Every YAML under configs/schedules/ must parse with the
2182        // current Schedule serde AND pass validate() — keeps the
2183        // shipped examples from drifting out of sync with the model
2184        // (#418 removed back-compat, so drift = broken at create).
2185        let dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../configs/schedules");
2186        let mut seen = 0;
2187        for entry in std::fs::read_dir(&dir).expect("read configs/schedules") {
2188            let path = entry.expect("dir entry").path();
2189            if path.extension().and_then(|e| e.to_str()) != Some("yaml") {
2190                continue;
2191            }
2192            let body = std::fs::read_to_string(&path).expect("read yaml");
2193            let s: Schedule = serde_yaml::from_str(&body)
2194                .unwrap_or_else(|e| panic!("{} failed to parse: {e}", path.display()));
2195            s.validate()
2196                .unwrap_or_else(|e| panic!("{} failed validate(): {e}", path.display()));
2197            seen += 1;
2198        }
2199        assert!(seen > 0, "no schedule YAMLs found in {}", dir.display());
2200    }
2201
2202    // ---- pre-existing enum wire formats (unchanged by #418) ----
2203
2204    #[test]
2205    fn exec_mode_serialises_snake_case() {
2206        for (mode, expected) in [
2207            (ExecMode::EveryTick, "every_tick"),
2208            (ExecMode::OncePerPc, "once_per_pc"),
2209            (ExecMode::OncePerTarget, "once_per_target"),
2210        ] {
2211            let s = serde_json::to_value(mode).expect("serialise");
2212            assert_eq!(s, serde_json::Value::String(expected.into()));
2213            let back: ExecMode = serde_json::from_value(serde_json::Value::String(expected.into()))
2214                .expect("deserialise");
2215            assert_eq!(back, mode, "round-trip for {expected}");
2216        }
2217    }
2218
2219    #[test]
2220    fn schedule_runs_on_defaults_to_backend() {
2221        let yaml = r#"
2222id: x
2223when:
2224  per_pc: once
2225job_id: y
2226target: { all: true }
2227"#;
2228        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
2229        assert_eq!(s.runs_on, RunsOn::Backend);
2230    }
2231
2232    #[test]
2233    fn schedule_runs_on_agent_parses() {
2234        let yaml = r#"
2235id: offline-inv
2236when:
2237  per_pc: { every: 1h }
2238job_id: inventory-hw
2239target: { all: true }
2240runs_on: agent
2241"#;
2242        let s: Schedule = serde_yaml::from_str(yaml).expect("parse");
2243        assert_eq!(s.runs_on, RunsOn::Agent);
2244        assert_eq!(s.lowered().mode, ExecMode::OncePerPc);
2245    }
2246
2247    #[test]
2248    fn runs_on_serialises_snake_case() {
2249        for (mode, expected) in [(RunsOn::Backend, "backend"), (RunsOn::Agent, "agent")] {
2250            let s = serde_json::to_value(mode).expect("serialise");
2251            assert_eq!(s, serde_json::Value::String(expected.into()));
2252            let back: RunsOn = serde_json::from_value(serde_json::Value::String(expected.into()))
2253                .expect("deserialise");
2254            assert_eq!(back, mode);
2255        }
2256    }
2257
2258    #[test]
2259    fn execute_shell_into_wire_shell() {
2260        assert_eq!(Shell::from(ExecuteShell::Powershell), Shell::Powershell);
2261        assert_eq!(Shell::from(ExecuteShell::Cmd), Shell::Cmd);
2262    }
2263
2264    #[test]
2265    fn manifest_staleness_defaults_to_cached() {
2266        let yaml = r#"
2267id: x
2268version: 1.0.0
2269execute:
2270  shell: powershell
2271  script: "echo"
2272  timeout: 1s
2273"#;
2274        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
2275        assert_eq!(m.staleness, Staleness::Cached);
2276    }
2277
2278    #[test]
2279    fn manifest_strict_staleness_parses() {
2280        let yaml = r#"
2281id: urgent-patch
2282version: 2.5.1
2283execute:
2284  shell: powershell
2285  script: Install-Hotfix
2286  timeout: 5m
2287staleness:
2288  mode: strict
2289  max_cache_age: 0s
2290"#;
2291        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
2292        match m.staleness {
2293            Staleness::Strict { max_cache_age } => assert_eq!(max_cache_age, "0s"),
2294            other => panic!("expected strict, got {other:?}"),
2295        }
2296    }
2297
2298    #[test]
2299    fn manifest_unchecked_staleness_parses() {
2300        let yaml = r#"
2301id: legacy
2302version: 0.1.0
2303execute:
2304  shell: cmd
2305  script: "echo"
2306  timeout: 1s
2307staleness:
2308  mode: unchecked
2309"#;
2310        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
2311        assert_eq!(m.staleness, Staleness::Unchecked);
2312    }
2313
2314    #[test]
2315    fn missing_required_field_errors() {
2316        // `id` missing.
2317        let yaml = r#"
2318version: 1.0.0
2319target: { all: true }
2320execute:
2321  shell: powershell
2322  script: "echo"
2323  timeout: 1s
2324"#;
2325        let r: Result<Manifest, _> = serde_yaml::from_str(yaml);
2326        assert!(r.is_err(), "expected error, got {:?}", r);
2327    }
2328
2329    #[test]
2330    fn display_field_table_kind_round_trips_with_nested_columns() {
2331        // #39: `type: table` + `columns:` on a DisplayField gets
2332        // round-tripped through serde so the SPA receives the
2333        // nested schema verbatim. Nested columns themselves are
2334        // DisplayFields so they can carry `type: bytes` /
2335        // `type: number` for cell formatting.
2336        let yaml = r#"
2337id: inv-hw
2338version: 1.0.0
2339execute:
2340  shell: powershell
2341  script: "echo"
2342  timeout: 60s
2343inventory:
2344  display:
2345    - field: hostname
2346      label: Hostname
2347    - field: disks
2348      label: Disks
2349      type: table
2350      columns:
2351        - field: device_id
2352          label: Drive
2353        - field: size_bytes
2354          label: Size
2355          type: bytes
2356        - field: free_bytes
2357          label: Free
2358          type: bytes
2359        - field: file_system
2360          label: FS
2361"#;
2362        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
2363        let inv = m.inventory.as_ref().expect("inventory hint");
2364        let disks = inv
2365            .display
2366            .iter()
2367            .find(|d| d.field == "disks")
2368            .expect("disks display row");
2369        assert_eq!(disks.kind.as_deref(), Some("table"));
2370        let cols = disks.columns.as_ref().expect("table needs columns");
2371        assert_eq!(cols.len(), 4);
2372        assert_eq!(cols[1].field, "size_bytes");
2373        assert_eq!(cols[1].kind.as_deref(), Some("bytes"));
2374    }
2375
2376    #[test]
2377    fn display_field_scalar_kind_keeps_columns_none() {
2378        // Defensive: when type is a scalar (`bytes` / `number` /
2379        // `timestamp`) the `columns` field stays None — the SPA
2380        // uses its presence as the "render nested table" signal,
2381        // so it must not leak in via serde defaults.
2382        let yaml = r#"
2383id: x
2384version: 1.0.0
2385execute:
2386  shell: powershell
2387  script: "echo"
2388  timeout: 5s
2389inventory:
2390  display:
2391    - { field: ram_bytes, label: RAM, type: bytes }
2392"#;
2393        let m: Manifest = serde_yaml::from_str(yaml).expect("parse");
2394        let inv = m.inventory.as_ref().unwrap();
2395        assert!(inv.display[0].columns.is_none());
2396    }
2397}
2398
2399/// Periodic schedule (spec §2.4.3). v0.18.0 carries the fanout plan
2400/// (target + optional rollout + optional jitter) inline; the
2401/// referenced job (`job_id` → [`BUCKET_JOBS`]) supplies only the
2402/// script body. Two schedules of the same job can target different
2403/// groups on different cadences without copying the manifest.
2404///
2405/// #418 Phase 1: the cadence is the single [`When`] field. The old
2406/// `cron` × `mode` × `cooldown` × `auto_disable_when_done` quartet
2407/// is gone (no back-compat — pre-Phase-1 KV blobs fail to parse and
2408/// are warn-skipped; re-`schedule create` to upgrade them). The
2409/// engine underneath is unchanged: [`Schedule::lowered`] maps `when`
2410/// onto the same (cron, ExecMode, cooldown) trio the scheduler and
2411/// `decide_fire` always ran on.
2412#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone)]
2413pub struct Schedule {
2414    pub id: String,
2415    /// When to fire — a reconcile cadence (`per_pc` / `per_target`)
2416    /// or a calendar time trigger (`at` / `days`). See [`When`].
2417    ///
2418    /// `singleton_map`: serde_yaml 0.9 renders externally-tagged
2419    /// enums as `!per_pc` YAML tags by default; this keeps the
2420    /// operator-facing map shape (`when: { per_pc: once }`). JSON
2421    /// output is identical either way, and the schemars schema
2422    /// (external tagging = oneOf of single-key objects) already
2423    /// matches the singleton-map wire shape.
2424    #[serde(with = "serde_yaml::with::singleton_map")]
2425    #[schemars(with = "When")]
2426    pub when: When,
2427    /// Key into [`crate::kv::BUCKET_JOBS`]. Must equal a registered
2428    /// Manifest's `id`.
2429    pub job_id: String,
2430    /// Who + how-to-phase + when-to-stagger. The Manifest doesn't
2431    /// carry these any more — same job + different fanout = different
2432    /// schedule.
2433    #[serde(flatten)]
2434    pub plan: FanoutPlan,
2435    /// Optional validity window. Outside `[from, until)` the
2436    /// schedule is dormant — still registered, still visible, but
2437    /// every tick is skipped (deleted ≠ dormant: a campaign that
2438    /// ended stays inspectable and can be re-armed by editing the
2439    /// window). Checked at tick time on both the backend scheduler
2440    /// and the agent's local scheduler.
2441    #[serde(default, skip_serializing_if = "Active::is_empty")]
2442    pub active: Active,
2443    /// #418 Phase 3: operational constraints gating *when within an
2444    /// active period* a fire may happen. Currently just `window`
2445    /// (a maintenance time-of-day window); future `require`
2446    /// (env gates) and `max_concurrent` land in the same namespace.
2447    /// Evaluated in the schedule's `tz` like the other wall-clock
2448    /// fields. Checked at tick time on both schedulers.
2449    #[serde(default, skip_serializing_if = "Constraints::is_empty")]
2450    pub constraints: Constraints,
2451    /// #418 Phase 4: what to do after a fire's script comes back
2452    /// failed. Currently just `retry` (fixed-backoff in-process
2453    /// re-run); future `notify` / `disable` join the same namespace.
2454    /// Applied fire-side in `handle_command` (the retry policy is
2455    /// lowered onto every Command this schedule produces), so it
2456    /// covers both `runs_on` locations.
2457    #[serde(default, skip_serializing_if = "OnFailure::is_empty")]
2458    pub on_failure: OnFailure,
2459    /// #418 Phase 2: the timezone this schedule's wall-clock fields
2460    /// are evaluated in — both the calendar `at` firing time AND the
2461    /// `active.{from,until}` window bounds. `local` (default) = the
2462    /// running host's TZ (the agent's for `runs_on: agent`, the
2463    /// backend server's otherwise); `utc` for TZ-independent
2464    /// schedules. Reconcile shapes (`per_pc`/`per_target`) ignore it
2465    /// for firing (poll cron runs every minute regardless) but still
2466    /// honor it for the `active` window.
2467    #[serde(default)]
2468    pub tz: ScheduleTz,
2469    /// v0.22: optional humantime window after a cron tick during
2470    /// which the Command is still considered "live". The scheduler
2471    /// computes `tick_at + starting_deadline` and stamps it onto
2472    /// each Command as `deadline_at`; agents skip Commands they
2473    /// receive after that absolute time. `None` (default) = no
2474    /// deadline, meaning a Command queued in the broker / stream
2475    /// during agent downtime runs whenever the agent reconnects —
2476    /// good for kitting / inventory / cleanup. Set this for
2477    /// time-of-day notifications, lunch reminders, etc., where
2478    /// "fire 3 hours late" would be wrong.
2479    #[serde(default, skip_serializing_if = "Option::is_none")]
2480    pub starting_deadline: Option<String>,
2481    /// v0.23: where does the cron tick happen? `Backend` (default,
2482    /// historical) = backend's scheduler fires Commands via NATS;
2483    /// agents passively receive. `Agent` = each targeted agent runs
2484    /// its own internal cron and fires locally, so the schedule
2485    /// keeps ticking even when the broker is unreachable (laptop on
2486    /// the train, broker maintenance window, full WAN outage). The
2487    /// two locations are mutually exclusive — when `Agent`, the
2488    /// backend scheduler stays out and just keeps the definition in
2489    /// KV for agents to read.
2490    #[serde(default)]
2491    pub runs_on: RunsOn,
2492    #[serde(default = "default_true")]
2493    pub enabled: bool,
2494}
2495
2496/// v0.23 — where the cron tick fires from.
2497#[derive(
2498    Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Copy, PartialEq, Eq, Default,
2499)]
2500#[serde(rename_all = "snake_case")]
2501pub enum RunsOn {
2502    /// Backend's central scheduler ticks and publishes Commands to
2503    /// NATS. Historical default, what every pre-v0.23 schedule
2504    /// uses. Agent offline ⇒ Command queued in STREAM_EXEC; agent
2505    /// reconnects ⇒ catch-up via [`command_replay`](crate)
2506    /// (see kanade-agent's command_replay module).
2507    #[default]
2508    Backend,
2509    /// Each targeted agent runs the cron tick locally. Survives
2510    /// broker / WAN outages. Best for laptops / mobile devices that
2511    /// roam off the corporate network. Agent must be online for the
2512    /// initial schedule + job-catalog pull, but once cached the
2513    /// agent fires the script standalone.
2514    Agent,
2515}
2516
2517/// Per-pc/per-target dedup semantics for a [`Schedule`] (v0.19).
2518#[derive(
2519    Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Copy, PartialEq, Eq, Default,
2520)]
2521#[serde(rename_all = "snake_case")]
2522pub enum ExecMode {
2523    /// Fire on every cron tick at the whole target. Historical
2524    /// (pre-v0.19) behavior; no dedup.
2525    #[default]
2526    EveryTick,
2527    /// Fire at each pc until that pc succeeds; then skip it until
2528    /// the optional cooldown elapses (or forever if no cooldown).
2529    /// Use for kitting / first-boot / per-pc compliance checks.
2530    OncePerPc,
2531    /// Fire at the whole target until **any** pc succeeds; then
2532    /// skip the whole target until the optional cooldown elapses
2533    /// (or forever if no cooldown). Use for "one delegate is
2534    /// enough" tasks like license check-in.
2535    OncePerTarget,
2536}
2537
2538/// #418 Phase 1 — the single "when does this fire" axis.
2539///
2540/// Replaces the old `cron` + `mode` + `cooldown` trio whose
2541/// interactions were implicit (cron doubled as both a real
2542/// time-of-day trigger and a reconcile poll period; contradictory
2543/// combinations silently no-opped). Two shapes:
2544///
2545/// * **reconcile** (`per_pc` / `per_target`) — desired-state: "each
2546///   pc (or one delegate) should have run this within `every`".
2547///   The poll period is system-generated ([`POLL_CRON`], every
2548///   minute) and no longer the operator's concern.
2549/// * **calendar** (`{ at, days }`) — a wall-clock time trigger
2550///   (#418 Phase 2, replacing the old raw-cron escape hatch). Fires
2551///   the whole target at the given time, no dedup. `at: "09:00"` +
2552///   `days` repeats; `at: "2026-06-10 09:00"` (a date+time) fires
2553///   exactly once. Evaluated in the schedule's top-level `tz`.
2554#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, PartialEq, Eq)]
2555#[serde(rename_all = "snake_case")]
2556pub enum When {
2557    /// Fire at each targeted pc: `once` (kitting — succeed once,
2558    /// skip forever, forever catching brand-new / re-imaged pcs)
2559    /// or `{ every: <humantime> }` (patrol — re-arm per pc after
2560    /// the interval).
2561    PerPc(PerPolicy),
2562    /// Fire until **any** one pc of the target succeeds, then skip
2563    /// the whole target (`once`) or re-arm after `every`. Needs
2564    /// fleet-wide completion data, so it is backend-only —
2565    /// `runs_on: agent` + `per_target` is rejected by
2566    /// [`Schedule::validate`].
2567    PerTarget(PerPolicy),
2568    /// Calendar time trigger: `{ at: "09:00", days: [mon-fri] }`
2569    /// (repeating) or `{ at: "2026-06-10 09:00" }` (one-shot). Fires
2570    /// the whole target at that wall-clock time in the schedule's
2571    /// `tz` — no dedup, no cooldown.
2572    Calendar(CalendarSpec),
2573}
2574
2575/// Calendar time trigger (#418 Phase 2). `at` is either a time of
2576/// day (`"HH:MM"`, repeating — combine with `days`) or a full
2577/// date+time (`"YYYY-MM-DD HH:MM"`, a one-shot that fires once and
2578/// never again). Evaluated in the schedule's top-level `tz`.
2579#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, PartialEq, Eq)]
2580#[serde(deny_unknown_fields)]
2581pub struct CalendarSpec {
2582    /// `"HH:MM"` (24h) for a repeating trigger, or
2583    /// `"YYYY-MM-DD HH:MM"` (hyphen / slash / `T` separators all
2584    /// accepted) for a one-shot. Parsed lazily —
2585    /// [`Schedule::validate`] rejects garbage at create time.
2586    pub at: String,
2587    /// Day-of-week filter for a time-of-day `at`: `["mon-fri"]`,
2588    /// `["mon","wed","fri"]`, … (passed verbatim to the cron DOW
2589    /// field, so ranges and names both work). Empty = every day.
2590    /// Must be empty when `at` carries a date (the date already
2591    /// pins the day).
2592    #[serde(default, skip_serializing_if = "Vec::is_empty")]
2593    pub days: Vec<String>,
2594}
2595
2596/// Parsed `CalendarSpec.at`: the wall-clock minute/hour, plus the
2597/// date for a one-shot (`None` = repeating time-of-day).
2598struct ParsedAt {
2599    minute: u32,
2600    hour: u32,
2601    date: Option<chrono::NaiveDate>,
2602}
2603
2604impl CalendarSpec {
2605    /// Parse `at`: a date+time (`YYYY-MM-DD HH:MM`, hyphen / slash /
2606    /// `T` separators) is a one-shot; a bare `HH:MM` is repeating.
2607    fn parse_at(&self) -> Result<ParsedAt, String> {
2608        use chrono::Timelike;
2609        let s = self.at.trim();
2610        for fmt in ["%Y-%m-%d %H:%M", "%Y-%m-%dT%H:%M", "%Y/%m/%d %H:%M"] {
2611            if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(s, fmt) {
2612                return Ok(ParsedAt {
2613                    minute: dt.minute(),
2614                    hour: dt.hour(),
2615                    date: Some(dt.date()),
2616                });
2617            }
2618        }
2619        if let Ok(t) = chrono::NaiveTime::parse_from_str(s, "%H:%M") {
2620            return Ok(ParsedAt {
2621                minute: t.minute(),
2622                hour: t.hour(),
2623                date: None,
2624            });
2625        }
2626        Err(format!(
2627            "when.at: unparseable '{}' (want HH:MM or YYYY-MM-DD HH:MM)",
2628            self.at
2629        ))
2630    }
2631
2632    /// Pre-flight check on the `days` tokens so a bad day name gives
2633    /// a `when.days:`-scoped error instead of croner's confusing
2634    /// "when.at lowered to invalid cron" (claude #432 review). Each
2635    /// token is a day name (`mon`..`sun`), a numeric DOW (`0`..`7`),
2636    /// `*`, or a `-` range of those.
2637    fn validate_days(&self) -> Result<(), String> {
2638        const NAMES: [&str; 7] = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"];
2639        for tok in &self.days {
2640            // Report the whole token on a malformed range like `mon-`
2641            // (which would otherwise split to a cryptic empty part —
2642            // claude #432 follow-up).
2643            let invalid = |reason: &str| {
2644                Err(format!(
2645                    "when.days: invalid day token '{tok}' ({reason}; \
2646                     want mon..sun, 0-7, a range like mon-fri, or *)"
2647                ))
2648            };
2649            for part in tok.split('-') {
2650                let p = part.trim().to_ascii_lowercase();
2651                if p.is_empty() {
2652                    return invalid("empty range bound");
2653                }
2654                let ok = p == "*"
2655                    || NAMES.contains(&p.as_str())
2656                    || p.parse::<u8>().map(|n| n <= 7).unwrap_or(false);
2657                if !ok {
2658                    return invalid(&format!("'{part}' is not a day"));
2659                }
2660            }
2661        }
2662        Ok(())
2663    }
2664
2665    /// For a one-shot (`at` carries a date), the absolute instant it
2666    /// fires in `tz`. `None` for a repeating calendar. Used to warn
2667    /// about a one-shot whose date is already in the past (it would
2668    /// never fire).
2669    pub fn oneshot_instant(&self, tz: ScheduleTz) -> Option<chrono::DateTime<chrono::Utc>> {
2670        let p = self.parse_at().ok()?;
2671        let date = p.date?;
2672        let naive = date.and_hms_opt(p.hour, p.minute, 0)?;
2673        tz.naive_to_utc(naive)
2674    }
2675
2676    /// The wall-clock time-of-day this calendar fires at (`None` if
2677    /// `at` is unparseable — validate() guards that). Used to detect
2678    /// a calendar whose fire time can never fall inside its
2679    /// `constraints.window` (claude #452 review).
2680    pub fn fire_time(&self) -> Option<chrono::NaiveTime> {
2681        let p = self.parse_at().ok()?;
2682        chrono::NaiveTime::from_hms_opt(p.hour, p.minute, 0)
2683    }
2684
2685    /// Lower to the cron string the scheduler engine runs. Repeating
2686    /// → 6-field `0 {min} {hour} * * {dow}`; one-shot → 7-field
2687    /// `0 {min} {hour} {day} {month} * {year}` (a past year never
2688    /// fires — that's what makes it one-shot).
2689    fn to_cron(&self) -> Result<String, String> {
2690        use chrono::Datelike;
2691        let ParsedAt { minute, hour, date } = self.parse_at()?;
2692        match date {
2693            Some(d) => {
2694                if !self.days.is_empty() {
2695                    return Err(
2696                        "when.at with a date is a one-shot and cannot be combined with days".into(),
2697                    );
2698                }
2699                Ok(format!(
2700                    "0 {minute} {hour} {} {} * {}",
2701                    d.day(),
2702                    d.month(),
2703                    d.year()
2704                ))
2705            }
2706            None => {
2707                let dow = if self.days.is_empty() {
2708                    "*".to_string()
2709                } else {
2710                    self.validate_days()?;
2711                    self.days.join(",")
2712                };
2713                Ok(format!("0 {minute} {hour} * * {dow}"))
2714            }
2715        }
2716    }
2717}
2718
2719/// The timezone a schedule's wall-clock fields (`when.at`,
2720/// `active.{from,until}`) are evaluated in (#418 Phase 2).
2721#[derive(
2722    Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Copy, PartialEq, Eq, Default,
2723)]
2724#[serde(rename_all = "snake_case")]
2725pub enum ScheduleTz {
2726    /// The running host's local timezone — the agent's for
2727    /// `runs_on: agent`, the backend server's otherwise. Default.
2728    #[default]
2729    Local,
2730    /// UTC — for timezone-independent schedules.
2731    Utc,
2732}
2733
2734impl ScheduleTz {
2735    /// Interpret a naive (zoneless) datetime as being in this tz and
2736    /// convert to UTC. On a DST *fold* (the local time occurs twice
2737    /// when clocks go back) we pick `.earliest()` rather than
2738    /// rejecting it; `None` is reserved for a true DST *gap* (a local
2739    /// time that never exists). `Utc` is fixed-offset so neither ever
2740    /// happens; `Local` is whatever timezone the running host is set
2741    /// to and *can* hit a gap/fold on any DST-observing host — not
2742    /// just the JST we run today (gemini + claude #432 review).
2743    fn naive_to_utc(self, naive: chrono::NaiveDateTime) -> Option<chrono::DateTime<chrono::Utc>> {
2744        use chrono::TimeZone;
2745        match self {
2746            ScheduleTz::Utc => Some(chrono::DateTime::from_naive_utc_and_offset(
2747                naive,
2748                chrono::Utc,
2749            )),
2750            ScheduleTz::Local => chrono::Local
2751                .from_local_datetime(&naive)
2752                .earliest()
2753                .map(|dt| dt.with_timezone(&chrono::Utc)),
2754        }
2755    }
2756
2757    /// The wall-clock time-of-day `now` reads as in this tz — used by
2758    /// [`Constraints::allows`] to test a maintenance window
2759    /// (#418 Phase 3). `Utc` is the naive UTC time; `Local` is the
2760    /// running host's local time.
2761    fn wall_time(self, now: chrono::DateTime<chrono::Utc>) -> chrono::NaiveTime {
2762        match self {
2763            ScheduleTz::Utc => now.time(),
2764            ScheduleTz::Local => now.with_timezone(&chrono::Local).time(),
2765        }
2766    }
2767}
2768
2769/// `once` vs `{ every: <humantime> }` — shared by `per_pc` /
2770/// `per_target`. Untagged so the YAML stays the bare keyword or a
2771/// one-key map, nothing more ceremonial.
2772#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, PartialEq, Eq)]
2773#[serde(untagged)]
2774pub enum PerPolicy {
2775    /// The bare string `once`: succeed once, then skip permanently
2776    /// (cooldown = infinity).
2777    Once(OnceLiteral),
2778    /// Re-arm after the humantime interval, e.g. `{ every: 6h }`.
2779    Every(EverySpec),
2780}
2781
2782/// Single-variant enum so serde accepts exactly the string `once`
2783/// (a free-form `String` would swallow typos like `onec`).
2784#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Copy, PartialEq, Eq)]
2785#[serde(rename_all = "snake_case")]
2786pub enum OnceLiteral {
2787    Once,
2788}
2789
2790/// `{ every: <humantime> }`. Standalone struct (not an inline
2791/// struct variant) so `deny_unknown_fields` still bites under the
2792/// untagged [`PerPolicy`] — `{ evry: 6h }` is a parse error, not a
2793/// silently-ignored key.
2794#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, PartialEq, Eq)]
2795#[serde(deny_unknown_fields)]
2796pub struct EverySpec {
2797    /// Humantime interval (`10m`, `6h`, `1d`...). Parsed lazily —
2798    /// [`Schedule::validate`] rejects garbage at create time.
2799    pub every: String,
2800}
2801
2802impl PerPolicy {
2803    /// The cooldown this policy lowers to: `once` = `None`
2804    /// (permanent skip), `every` = the interval.
2805    fn cooldown(&self) -> Option<String> {
2806        match self {
2807            PerPolicy::Once(_) => None,
2808            PerPolicy::Every(EverySpec { every }) => Some(every.clone()),
2809        }
2810    }
2811}
2812
2813impl std::fmt::Display for When {
2814    /// Operator-facing one-liner (`per_pc once` / `per_pc every 6h`
2815    /// / `at 09:00 [mon-fri]` / `at 2026-06-10 09:00`) for log
2816    /// lines, audit payloads and the API's `ScheduleSummary`.
2817    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2818        let policy = |p: &PerPolicy| match p {
2819            PerPolicy::Once(_) => "once".to_string(),
2820            PerPolicy::Every(EverySpec { every }) => format!("every {every}"),
2821        };
2822        match self {
2823            When::PerPc(p) => write!(f, "per_pc {}", policy(p)),
2824            When::PerTarget(p) => write!(f, "per_target {}", policy(p)),
2825            When::Calendar(c) if c.days.is_empty() => write!(f, "at {}", c.at),
2826            When::Calendar(c) => write!(f, "at {} [{}]", c.at, c.days.join(",")),
2827        }
2828    }
2829}
2830
2831/// Optional validity window for a [`Schedule`] (#418 decision G).
2832/// Half-open `[from, until)`; either bound may be omitted. Bounds
2833/// are `YYYY-MM-DD` (= that day's 00:00 in the schedule's `tz`) or
2834/// full RFC3339 (offset is honored as-is, `tz` ignored). Kept as
2835/// strings so the JSON Schema the SPA editor consumes stays two
2836/// plain string fields, mirroring `jitter` / `starting_deadline`.
2837///
2838/// #418 Phase 2: bounds are evaluated in the schedule's top-level
2839/// `tz` (was UTC-only in Phase 1) so `tz: local` makes both the
2840/// calendar `at` AND the `active` window local — one consistent
2841/// timezone per schedule.
2842#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Default, PartialEq, Eq)]
2843#[serde(deny_unknown_fields)]
2844pub struct Active {
2845    /// Dormant before this instant.
2846    #[serde(default, skip_serializing_if = "Option::is_none")]
2847    pub from: Option<String>,
2848    /// Dormant from this instant on (exclusive).
2849    #[serde(default, skip_serializing_if = "Option::is_none")]
2850    pub until: Option<String>,
2851}
2852
2853impl Active {
2854    /// `skip_serializing_if` helper — an empty window means "always
2855    /// active" and is omitted from the wire format entirely.
2856    pub fn is_empty(&self) -> bool {
2857        self.from.is_none() && self.until.is_none()
2858    }
2859
2860    /// Parse one bound: RFC3339 first (offset honored, `tz`
2861    /// ignored), then bare `YYYY-MM-DD` (00:00 in `tz`).
2862    pub fn parse_bound(s: &str, tz: ScheduleTz) -> Result<chrono::DateTime<chrono::Utc>, String> {
2863        if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(s) {
2864            return Ok(dt.with_timezone(&chrono::Utc));
2865        }
2866        if let Ok(d) = chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d") {
2867            let midnight = d.and_hms_opt(0, 0, 0).expect("00:00:00 is always valid");
2868            return tz.naive_to_utc(midnight).ok_or_else(|| {
2869                format!("active: bound '{s}' falls in a DST gap for the schedule's tz")
2870            });
2871        }
2872        Err(format!(
2873            "active: unparseable bound '{s}' (want YYYY-MM-DD or RFC3339)"
2874        ))
2875    }
2876
2877    /// Is `now` inside the window? Unparseable bounds are treated
2878    /// as absent here (fail-open) — [`Schedule::validate`] is the
2879    /// place that rejects them loudly; this runs on every tick and
2880    /// must never panic on a stale KV blob.
2881    pub fn contains(&self, now: chrono::DateTime<chrono::Utc>, tz: ScheduleTz) -> bool {
2882        let bound = |s: &Option<String>| s.as_deref().and_then(|s| Self::parse_bound(s, tz).ok());
2883        if bound(&self.from).is_some_and(|from| now < from) {
2884            return false;
2885        }
2886        if bound(&self.until).is_some_and(|until| now >= until) {
2887            return false;
2888        }
2889        true
2890    }
2891}
2892
2893/// Operational constraints on a [`Schedule`] (#418 Phase 3). Where
2894/// [`Active`] decides *over what date range* a schedule is live,
2895/// `Constraints` decides *when, within an active period,* a fire is
2896/// allowed. Only `window` (a maintenance time-of-day window) so far;
2897/// `require` (env gates) and `max_concurrent` will join this struct
2898/// in later phases.
2899#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Default, PartialEq, Eq)]
2900#[serde(deny_unknown_fields)]
2901pub struct Constraints {
2902    /// `"HH:MM-HH:MM"` wall-clock window (evaluated in the schedule's
2903    /// `tz`). Fires outside it are skipped — mainly for reconcile
2904    /// cadences ("patrol every 6h, but only fire overnight") and
2905    /// daytime change-freezes. `start > end` crosses midnight
2906    /// (`"22:00-05:00"` = 22:00 through 05:00 next morning). Parsed
2907    /// lazily; [`Schedule::validate`] rejects garbage at create time.
2908    #[serde(default, skip_serializing_if = "Option::is_none")]
2909    pub window: Option<String>,
2910}
2911
2912impl Constraints {
2913    /// `skip_serializing_if` helper — empty constraints are omitted
2914    /// from the wire format entirely.
2915    pub fn is_empty(&self) -> bool {
2916        self.window.is_none()
2917    }
2918
2919    /// Parse `"HH:MM-HH:MM"` into `(start, end)`. Equal bounds are an
2920    /// error (a zero-width or all-day window is ambiguous — write no
2921    /// window for "always").
2922    pub fn parse_window(s: &str) -> Result<(chrono::NaiveTime, chrono::NaiveTime), String> {
2923        let (a, b) = s
2924            .split_once('-')
2925            .ok_or_else(|| format!("constraints.window: '{s}' must be 'HH:MM-HH:MM'"))?;
2926        let parse = |part: &str| {
2927            chrono::NaiveTime::parse_from_str(part.trim(), "%H:%M")
2928                .map_err(|e| format!("constraints.window: invalid time '{}': {e}", part.trim()))
2929        };
2930        let (start, end) = (parse(a)?, parse(b)?);
2931        if start == end {
2932            return Err(format!(
2933                "constraints.window: start and end are equal ('{s}'); omit window for 'always'"
2934            ));
2935        }
2936        Ok((start, end))
2937    }
2938
2939    /// Is a fire allowed at `now` (evaluated in `tz`)? No window =
2940    /// always allowed. Half-open `[start, end)`; `start > end`
2941    /// crosses midnight.
2942    ///
2943    /// **Fail-closed** on an unparseable window (returns `false`,
2944    /// gemini #452 review): a window is a *restrictive* constraint
2945    /// (change-freeze / overnight-only), so a corrupt one must NOT
2946    /// silently allow fires during the restricted hours. Bad windows
2947    /// are rejected at create time by [`Schedule::validate`]; this
2948    /// only bites a hand-edited KV blob, where blocking is the safe
2949    /// direction. The scheduler warns at register time
2950    /// ([`Schedule::bad_window`]) so a stuck schedule is diagnosable.
2951    /// The tick path never panics regardless.
2952    pub fn allows(&self, now: chrono::DateTime<chrono::Utc>, tz: ScheduleTz) -> bool {
2953        match self.window.as_deref() {
2954            // No window → always allowed.
2955            None => true,
2956            // Window set: membership, or fail-closed if unparseable
2957            // (`window_contains` returns None for a corrupt window).
2958            Some(_) => self.window_contains(tz.wall_time(now)).unwrap_or(false),
2959        }
2960    }
2961
2962    /// Membership of a wall-clock time-of-day in the window. `None`
2963    /// when there is no window or it's unparseable (callers decide
2964    /// the failure direction). `start > end` crosses midnight.
2965    fn window_contains(&self, t: chrono::NaiveTime) -> Option<bool> {
2966        let (start, end) = Self::parse_window(self.window.as_deref()?).ok()?;
2967        Some(if start <= end {
2968            start <= t && t < end
2969        } else {
2970            t >= start || t < end
2971        })
2972    }
2973}
2974
2975/// What to do when a fire's script fails (#418 Phase 4 — the "高"
2976/// retry/backoff gap). Where [`Constraints`] gates *whether* a fire
2977/// happens, `OnFailure` decides what happens *after* one ran and
2978/// came back bad. Only `retry` so far; future `notify` / `disable`
2979/// would join the same namespace.
2980#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, Default, PartialEq, Eq)]
2981#[serde(deny_unknown_fields)]
2982pub struct OnFailure {
2983    /// Re-run the script in-process when it exits non-zero (or times
2984    /// out), up to a cap, with a fixed backoff between attempts.
2985    /// `None` (default) = no retry: a failed run is published as-is
2986    /// and (for reconcile cadences) simply re-fires on the next poll
2987    /// tick. See [`Retry`].
2988    #[serde(default, skip_serializing_if = "Option::is_none")]
2989    pub retry: Option<Retry>,
2990}
2991
2992impl OnFailure {
2993    /// `skip_serializing_if` helper — an empty policy is omitted from
2994    /// the wire format entirely.
2995    pub fn is_empty(&self) -> bool {
2996        self.retry.is_none()
2997    }
2998
2999    /// Lower the operator-facing `retry` (humantime backoff) onto the
3000    /// engine vocabulary the agent's executor runs on (backoff in
3001    /// whole seconds). Single seam shared by the backend command
3002    /// builder and the agent's local scheduler so the two stamp the
3003    /// same [`crate::wire::RetrySpec`] onto every Command. Returns
3004    /// `None` when there is no retry policy or the backoff is
3005    /// unparseable (validate() rejects the latter at create time;
3006    /// this stays fail-safe = "no retry" for a hand-edited KV blob
3007    /// rather than panicking on the fire path).
3008    pub fn lowered_retry(&self) -> Option<crate::wire::RetrySpec> {
3009        let r = self.retry.as_ref()?;
3010        let backoff_secs = humantime::parse_duration(&r.backoff).ok()?.as_secs();
3011        Some(crate::wire::RetrySpec {
3012            max: r.max,
3013            backoff_secs,
3014        })
3015    }
3016}
3017
3018/// Fixed-backoff retry policy (#418 Phase 4). `max` is the number of
3019/// *additional* attempts after the first run (so `max: 3` = up to 4
3020/// total executions); `backoff` is the humantime delay slept between
3021/// attempts. The retry happens fire-side (inside `kanade fire` /
3022/// `handle_command`) on every OS for the PoC — the Windows-native
3023/// "restart on failure" Task Scheduler path is deferred to the
3024/// native-delegation phase (#418 decision H).
3025#[derive(Serialize, Deserialize, schemars::JsonSchema, Debug, Clone, PartialEq, Eq)]
3026#[serde(deny_unknown_fields)]
3027pub struct Retry {
3028    /// Max additional attempts after the first failure. Bounded
3029    /// `1..=10` by [`Schedule::validate`] — a typo'd `max: 1000`
3030    /// with a short backoff would otherwise pin a flapping script in
3031    /// a tight loop for the whole window.
3032    pub max: u32,
3033    /// Humantime delay slept between attempts (`"10m"`, `"30s"`).
3034    pub backoff: String,
3035}
3036
3037/// The system-generated poll cadence every reconcile-shaped `when`
3038/// lowers to. Operators never write this: the real inter-run
3039/// spacing is the `every` cooldown; this only bounds "how soon do
3040/// we notice somebody is due" (#418 decision B took the poll
3041/// period away from the operator).
3042pub const POLL_CRON: &str = "0 * * * * *";
3043
3044/// What a [`When`] lowers to — the exact (cron, mode, cooldown)
3045/// trio the pre-#418 engine ran on. Keeping the engine vocabulary
3046/// unchanged is what lets Phase 1 swap the operator surface without
3047/// touching the tick / dedup machinery.
3048pub struct Lowered {
3049    /// Cron handed to `tokio-cron-scheduler` — [`POLL_CRON`] for
3050    /// reconcile shapes, a 6/7-field cron for calendar shapes.
3051    pub cron: String,
3052    /// Dedup semantics for `decide_fire`.
3053    pub mode: ExecMode,
3054    /// Humantime re-arm interval (`None` = succeed once, skip
3055    /// forever).
3056    pub cooldown: Option<String>,
3057    /// Timezone to evaluate `cron` in (#418 Phase 2). The scheduler
3058    /// passes this to `Job::new_async_tz`. Reconcile shapes carry
3059    /// the schedule's tz too even though POLL_CRON is tz-agnostic,
3060    /// so the same value drives the `active`-window check.
3061    pub tz: ScheduleTz,
3062}
3063
3064impl Schedule {
3065    /// The error message if this schedule's `constraints.window` is
3066    /// set but unparseable, else `None`. The scheduler logs this at
3067    /// register time so a fail-closed (never-firing) schedule from a
3068    /// hand-edited KV blob is diagnosable (gemini #452 review).
3069    pub fn bad_window(&self) -> Option<String> {
3070        let w = self.constraints.window.as_deref()?;
3071        Constraints::parse_window(w).err()
3072    }
3073
3074    /// True when this is a `calendar` schedule whose fire time can
3075    /// never fall inside its `constraints.window` — the cron fires,
3076    /// the window check rejects it, and (firing only at that
3077    /// time-of-day) it effectively never runs. An easy misconfig to
3078    /// set up by accident; the scheduler warns at register time
3079    /// (claude #452 review). Reconcile shapes poll every minute, so
3080    /// they always catch the window opening and aren't affected.
3081    pub fn calendar_outside_window(&self) -> bool {
3082        let When::Calendar(c) = &self.when else {
3083            return false;
3084        };
3085        let Some(t) = c.fire_time() else {
3086            return false;
3087        };
3088        matches!(self.constraints.window_contains(t), Some(false))
3089    }
3090
3091    /// Lower the operator-facing `when` onto the engine vocabulary.
3092    /// Single seam shared by the backend scheduler and the agent's
3093    /// local scheduler so the two can never drift.
3094    pub fn lowered(&self) -> Lowered {
3095        let tz = self.tz;
3096        match &self.when {
3097            When::PerPc(p) => Lowered {
3098                cron: POLL_CRON.into(),
3099                mode: ExecMode::OncePerPc,
3100                cooldown: p.cooldown(),
3101                tz,
3102            },
3103            When::PerTarget(p) => Lowered {
3104                cron: POLL_CRON.into(),
3105                mode: ExecMode::OncePerTarget,
3106                cooldown: p.cooldown(),
3107                tz,
3108            },
3109            // `to_cron` only fails on a malformed `at` (rejected by
3110            // validate() at create time). For a hand-edited KV blob
3111            // that slipped past, emit a deliberately-invalid cron so
3112            // register()'s Job::new_async_tz fails → warn+skip,
3113            // rather than firing at the wrong time.
3114            When::Calendar(c) => Lowered {
3115                cron: c
3116                    .to_cron()
3117                    .unwrap_or_else(|_| "# invalid calendar at".into()),
3118                mode: ExecMode::EveryTick,
3119                cooldown: None,
3120                tz,
3121            },
3122        }
3123    }
3124
3125    /// Cross-field semantic checks that don't fit pure serde derive
3126    /// — the [`Manifest::validate`] counterpart (#418 decision F;
3127    /// pre-Phase-1 a broken schedule was accepted at create time
3128    /// and silently warn-skipped at tick time). Run at every create
3129    /// site: `kanade schedule create` (client-side) and
3130    /// `POST /api/schedules`. The job_id-exists check lives in the
3131    /// API handler instead — it needs the JOBS KV.
3132    pub fn validate(&self) -> Result<(), String> {
3133        if matches!(self.runs_on, RunsOn::Agent) && matches!(self.when, When::PerTarget(_)) {
3134            return Err(
3135                "when.per_target needs fleet-wide completion data and is backend-only; \
3136                 it cannot be combined with runs_on: agent (each agent self-schedules, \
3137                 so per-target dedup would be deduping across a target of 1)"
3138                    .into(),
3139            );
3140        }
3141        if let Some(cd) = self.lowered().cooldown.as_deref() {
3142            humantime::parse_duration(cd)
3143                .map_err(|e| format!("when.every: invalid duration '{cd}': {e}"))?;
3144        }
3145        if let When::Calendar(c) = &self.when {
3146            // Lower the calendar form to its cron (catches a bad `at`
3147            // and the date+days conflict), then validate that cron
3148            // with the same parser configuration tokio-cron-scheduler
3149            // 0.15 uses internally (croner, seconds required,
3150            // DOM-and-DOW both honored, year optional) — create-time
3151            // validation can never accept what register() rejects.
3152            let cron = c.to_cron()?;
3153            croner::parser::CronParser::builder()
3154                .seconds(croner::parser::Seconds::Required)
3155                .dom_and_dow(true)
3156                .build()
3157                .parse(&cron)
3158                .map_err(|e| format!("when.at lowered to invalid cron '{cron}': {e}"))?;
3159        }
3160        // The other humantime strings on the schedule (claude #419
3161        // review): runtime degrades gracefully on both (bad jitter →
3162        // silent no-op, bad starting_deadline → warn + skipped tick),
3163        // but "rejected at create time" should cover every field the
3164        // operator can typo, not just `when`.
3165        if let Some(j) = &self.plan.jitter {
3166            humantime::parse_duration(j)
3167                .map_err(|e| format!("jitter: invalid duration '{j}': {e}"))?;
3168        }
3169        if let Some(sd) = &self.starting_deadline {
3170            humantime::parse_duration(sd)
3171                .map_err(|e| format!("starting_deadline: invalid duration '{sd}': {e}"))?;
3172        }
3173        let from = self
3174            .active
3175            .from
3176            .as_deref()
3177            .map(|s| Active::parse_bound(s, self.tz))
3178            .transpose()?;
3179        let until = self
3180            .active
3181            .until
3182            .as_deref()
3183            .map(|s| Active::parse_bound(s, self.tz))
3184            .transpose()?;
3185        if let (Some(f), Some(u)) = (from, until) {
3186            if f >= u {
3187                return Err(format!(
3188                    "active.from ({}) must be strictly before active.until ({})",
3189                    self.active.from.as_deref().unwrap_or_default(),
3190                    self.active.until.as_deref().unwrap_or_default(),
3191                ));
3192            }
3193        }
3194        // #418 Phase 3: a bad maintenance window is rejected at create
3195        // time (parse_window also catches equal bounds).
3196        if let Some(w) = self.constraints.window.as_deref() {
3197            Constraints::parse_window(w)?;
3198        }
3199        // #418 Phase 4: a bad on_failure.retry is rejected at create
3200        // time — backoff must be valid humantime, and max is bounded
3201        // so a typo can't pin a flapping script in a tight loop.
3202        if let Some(r) = &self.on_failure.retry {
3203            let backoff = humantime::parse_duration(&r.backoff).map_err(|e| {
3204                format!(
3205                    "on_failure.retry.backoff: invalid duration '{}': {e}",
3206                    r.backoff
3207                )
3208            })?;
3209            // The wire form lowers backoff to whole seconds, so a
3210            // sub-second value would silently become a 0s no-wait
3211            // (coderabbit #466). Reject it rather than honour a backoff
3212            // the operator can't actually get.
3213            if backoff.as_secs() < 1 {
3214                return Err(format!(
3215                    "on_failure.retry.backoff must be >= 1s (got '{}'); sub-second backoffs \
3216                     round to 0 on the wire",
3217                    r.backoff
3218                ));
3219            }
3220            if !(1..=10).contains(&r.max) {
3221                return Err(format!(
3222                    "on_failure.retry.max must be 1..=10 (got {}); it counts additional \
3223                     attempts after the first run",
3224                    r.max
3225                ));
3226            }
3227        }
3228        Ok(())
3229    }
3230}
3231
3232fn default_true() -> bool {
3233    true
3234}