Skip to main content

codewhale_protocol/
fleet.rs

1//! Agent Fleet control-plane protocol types.
2//!
3//! These types define the durable, serializable contract between the fleet
4//! manager, workers, CLI/TUI surfaces, and the Runtime API. They are
5//! intentionally additive: existing runtime-event consumers ignore unknown
6//! fields and are unaffected by fleet extensions.
7//!
8//! See:
9//! - <https://github.com/Hmbown/CodeWhale/issues/3154> (Agent Fleet control plane)
10//! - <https://github.com/Hmbown/CodeWhale/issues/3096> (Runtime API sub-agent direction)
11
12use std::collections::BTreeMap;
13use std::path::PathBuf;
14
15use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
16use serde_json::Value;
17
18pub const FLEET_PROTOCOL_VERSION: &str = "0.1.0";
19
20/// Globally unique identifier for a fleet run.
21#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
22pub struct FleetRunId(pub String);
23
24impl From<String> for FleetRunId {
25    fn from(value: String) -> Self {
26        Self(value)
27    }
28}
29
30impl From<&str> for FleetRunId {
31    fn from(value: &str) -> Self {
32        Self(value.to_string())
33    }
34}
35
36/// Top-level fleet run handle.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct FleetRun {
39    pub id: FleetRunId,
40    pub name: String,
41    pub status: FleetRunStatus,
42    #[serde(default)]
43    pub task_specs: Vec<FleetTaskSpec>,
44    #[serde(default)]
45    pub worker_specs: Vec<FleetWorkerSpec>,
46    #[serde(default)]
47    pub labels: BTreeMap<String, String>,
48    #[serde(skip_serializing_if = "Option::is_none")]
49    pub security_policy: Option<FleetSecurityPolicy>,
50    pub created_at: String,
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub updated_at: Option<String>,
53    #[serde(skip_serializing_if = "Option::is_none")]
54    pub completed_at: Option<String>,
55}
56
57/// Lifecycle status for an entire fleet run.
58#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
59#[serde(rename_all = "snake_case")]
60pub enum FleetRunStatus {
61    Pending,
62    Queued,
63    Running,
64    Paused,
65    Completed,
66    Failed,
67    Cancelled,
68}
69
70/// Specification of a single unit of work within a run.
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct FleetTaskSpec {
73    pub id: String,
74    pub name: String,
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub description: Option<String>,
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub objective: Option<String>,
79    pub instructions: String,
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub worker: Option<FleetTaskWorkerProfile>,
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub workspace: Option<FleetWorkspaceRequirements>,
84    #[serde(default)]
85    #[serde(skip_serializing_if = "Vec::is_empty")]
86    pub input_files: Vec<PathBuf>,
87    #[serde(default)]
88    #[serde(skip_serializing_if = "Vec::is_empty")]
89    pub context: Vec<String>,
90    #[serde(skip_serializing_if = "Option::is_none")]
91    pub budget: Option<FleetTaskBudget>,
92    #[serde(default)]
93    #[serde(skip_serializing_if = "Vec::is_empty")]
94    pub tags: Vec<String>,
95    #[serde(default)]
96    pub expected_artifacts: Vec<FleetArtifactKind>,
97    #[serde(skip_serializing_if = "Option::is_none")]
98    pub scorer: Option<FleetScorerSpec>,
99    #[serde(skip_serializing_if = "Option::is_none")]
100    pub retry_policy: Option<FleetRetryPolicy>,
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub alert_policy: Option<FleetAlertPolicy>,
103    #[serde(default)]
104    pub timeout_seconds: Option<u64>,
105    #[serde(default)]
106    pub metadata: BTreeMap<String, Value>,
107}
108
109/// Worker role and tool expectations for a task.
110#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
111pub struct FleetTaskWorkerProfile {
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub role: Option<String>,
114    #[serde(skip_serializing_if = "Option::is_none")]
115    pub tool_profile: Option<String>,
116    #[serde(default)]
117    #[serde(skip_serializing_if = "Vec::is_empty")]
118    pub tools: Vec<String>,
119    #[serde(default)]
120    #[serde(skip_serializing_if = "Vec::is_empty")]
121    pub capabilities: Vec<String>,
122}
123
124/// Workspace and environment constraints needed before a task starts.
125#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
126pub struct FleetWorkspaceRequirements {
127    #[serde(skip_serializing_if = "Option::is_none")]
128    pub root: Option<PathBuf>,
129    #[serde(default)]
130    #[serde(skip_serializing_if = "Vec::is_empty")]
131    pub required_files: Vec<PathBuf>,
132    #[serde(default)]
133    #[serde(skip_serializing_if = "Vec::is_empty")]
134    pub writable_paths: Vec<PathBuf>,
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub environment: Option<FleetEnvironmentRequirements>,
137}
138
139/// Environment variables a task requires or may pass through to workers.
140#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
141pub struct FleetEnvironmentRequirements {
142    #[serde(default)]
143    #[serde(skip_serializing_if = "Vec::is_empty")]
144    pub required: Vec<String>,
145    #[serde(default)]
146    #[serde(skip_serializing_if = "Vec::is_empty")]
147    pub allowlist: Vec<String>,
148}
149
150/// Budget limits for a task.
151#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
152pub struct FleetTaskBudget {
153    #[serde(skip_serializing_if = "Option::is_none")]
154    pub max_tokens: Option<u64>,
155    #[serde(skip_serializing_if = "Option::is_none")]
156    pub max_tool_calls: Option<u32>,
157    #[serde(skip_serializing_if = "Option::is_none")]
158    pub max_seconds: Option<u64>,
159}
160
161/// Reference to an artifact produced or consumed by a task.
162#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
163pub struct FleetArtifactRef {
164    pub kind: FleetArtifactKind,
165    pub path: PathBuf,
166    #[serde(skip_serializing_if = "Option::is_none")]
167    pub checksum: Option<String>,
168    #[serde(skip_serializing_if = "Option::is_none")]
169    pub mime_type: Option<String>,
170    #[serde(default)]
171    pub size_bytes: Option<u64>,
172}
173
174/// Kind of artifact a task may produce or consume.
175#[derive(Debug, Clone, PartialEq, Eq)]
176pub enum FleetArtifactKind {
177    Log,
178    Patch,
179    TestResult,
180    Report,
181    Checkpoint,
182    Receipt,
183    Other(String),
184}
185
186impl FleetArtifactKind {
187    fn as_wire_str(&self) -> &str {
188        match self {
189            Self::Log => "log",
190            Self::Patch => "patch",
191            Self::TestResult => "test_result",
192            Self::Report => "report",
193            Self::Checkpoint => "checkpoint",
194            Self::Receipt => "receipt",
195            Self::Other(kind) => kind.as_str(),
196        }
197    }
198
199    fn from_wire_str(value: &str) -> Self {
200        match value {
201            "log" => Self::Log,
202            "patch" => Self::Patch,
203            "test_result" => Self::TestResult,
204            "report" => Self::Report,
205            "checkpoint" => Self::Checkpoint,
206            "receipt" => Self::Receipt,
207            other => Self::Other(other.to_string()),
208        }
209    }
210}
211
212impl Serialize for FleetArtifactKind {
213    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
214    where
215        S: Serializer,
216    {
217        serializer.serialize_str(self.as_wire_str())
218    }
219}
220
221impl<'de> Deserialize<'de> for FleetArtifactKind {
222    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
223    where
224        D: Deserializer<'de>,
225    {
226        let value = String::deserialize(deserializer)?;
227        Ok(Self::from_wire_str(&value))
228    }
229}
230
231/// Scoring rule used to verify a task result.
232#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
233#[serde(tag = "kind", rename_all = "snake_case")]
234pub enum FleetScorerSpec {
235    ExitCode,
236    FileExists {
237        path: PathBuf,
238    },
239    RegexMatch {
240        path: PathBuf,
241        pattern: String,
242    },
243    JsonPath {
244        path: PathBuf,
245        expression: String,
246    },
247    Command {
248        command: String,
249        #[serde(default)]
250        args: Vec<String>,
251    },
252    CodeWhaleVerifierPrompt {
253        prompt: String,
254    },
255    Manual,
256}
257
258/// Worker specification.
259#[derive(Debug, Clone, Serialize, Deserialize)]
260pub struct FleetWorkerSpec {
261    pub id: String,
262    pub name: String,
263    pub host: FleetHostSpec,
264    #[serde(default)]
265    #[serde(skip_serializing_if = "Option::is_none")]
266    pub trust_level: Option<FleetTrustLevel>,
267    #[serde(default)]
268    pub labels: BTreeMap<String, String>,
269    #[serde(default)]
270    pub capabilities: Vec<String>,
271    #[serde(skip_serializing_if = "Option::is_none")]
272    pub max_concurrent_tasks: Option<usize>,
273}
274
275/// Host on which a worker runs.
276#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
277#[serde(tag = "kind", rename_all = "snake_case")]
278pub enum FleetHostSpec {
279    Local,
280    Ssh {
281        host: String,
282        #[serde(skip_serializing_if = "Option::is_none")]
283        port: Option<u16>,
284        #[serde(skip_serializing_if = "Option::is_none")]
285        user: Option<String>,
286        #[serde(skip_serializing_if = "Option::is_none")]
287        identity: Option<PathBuf>,
288        /// Known hosts file for host-key verification.
289        #[serde(skip_serializing_if = "Option::is_none")]
290        known_hosts: Option<PathBuf>,
291        /// Expected host key fingerprint (SHA256:...) for key pinning.
292        /// When set, the connection is only trusted if the server's
293        /// host key matches this fingerprint exactly.
294        #[serde(skip_serializing_if = "Option::is_none")]
295        host_key_fingerprint: Option<String>,
296        #[serde(skip_serializing_if = "Option::is_none")]
297        working_directory: Option<PathBuf>,
298        #[serde(default)]
299        #[serde(skip_serializing_if = "Vec::is_empty")]
300        env_allowlist: Vec<String>,
301        #[serde(skip_serializing_if = "Option::is_none")]
302        codewhale_binary: Option<String>,
303    },
304    #[serde(alias = "container")]
305    #[serde(alias = "Container")]
306    Docker {
307        image: String,
308        #[serde(default)]
309        args: Vec<String>,
310    },
311}
312
313// ── Security and trust types ────────────────────────────────────────────────
314
315/// Trust classification assigned to a worker host.
316///
317/// The trust level determines what a worker is allowed to do and what
318/// secrets it may access. The default for new workers is [`FleetTrustLevel::Sandbox`];
319/// operators must explicitly raise trust for SSH or container workers.
320#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Default)]
321#[serde(rename_all = "snake_case")]
322pub enum FleetTrustLevel {
323    /// Fully isolated: no network, no secrets, no writes outside `.codewhale/fleet/`.
324    /// Suitable for untrusted code review, community PR checks, or third-party tool runs.
325    #[default]
326    Sandbox = 0,
327    /// Local-only worker with access to the workspace and configured secrets.
328    /// Default for local workers. May read repo files but writes are gated.
329    Local = 1,
330    /// Worker on a known remote host with verified identity and a bounded
331    /// set of explicitly granted capabilities. Requires SSH host-key
332    /// verification or equivalent attestation.
333    #[serde(alias = "remote-verified", alias = "remoteVerified")]
334    RemoteVerified = 2,
335    /// Fully trusted worker (e.g. operator's own machine, CI runner).
336    /// Has access to all configured secrets and may perform any action the
337    /// operator can. Reserved for dogfood smoke and operator-owned machines.
338    Operator = 3,
339}
340
341impl FleetTrustLevel {
342    /// Whether this trust level is allowed to access provider secrets.
343    #[must_use]
344    pub fn may_access_secrets(&self) -> bool {
345        matches!(self, Self::Operator | Self::RemoteVerified | Self::Local)
346    }
347
348    /// Whether this trust level is allowed to write outside `.codewhale/fleet/`.
349    #[must_use]
350    pub fn may_write_workspace(&self) -> bool {
351        matches!(self, Self::Operator | Self::Local)
352    }
353
354    /// Whether this trust level is allowed network access.
355    #[must_use]
356    pub fn may_access_network(&self) -> bool {
357        matches!(self, Self::Operator | Self::RemoteVerified | Self::Local)
358    }
359}
360
361/// Security policy applied to a fleet run.
362///
363/// A policy defines the default trust level for workers, which secrets
364/// may be resolved, and what capabilities are granted. When a run has no
365/// explicit policy, workers inherit conservative defaults.
366#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
367pub struct FleetSecurityPolicy {
368    /// Default trust level for workers that don't declare one explicitly.
369    #[serde(default)]
370    pub default_trust_level: FleetTrustLevel,
371    /// Secret refs that workers may resolve. An empty list means no secrets
372    /// are available. Each entry is a key name, not a value.
373    #[serde(default)]
374    #[serde(skip_serializing_if = "Vec::is_empty")]
375    pub allowed_secrets: Vec<FleetSecretRef>,
376    /// Capability grants for workers in this run.
377    #[serde(default)]
378    #[serde(skip_serializing_if = "Vec::is_empty")]
379    pub capability_grants: Vec<FleetCapabilityGrant>,
380    /// Maximum trust level any worker in this run may have, even if the
381    /// worker spec requests higher. Defaults to Operator (no ceiling).
382    #[serde(default = "default_max_trust_level")]
383    pub max_trust_level: FleetTrustLevel,
384    /// Require identity verification for remote workers. When true, SSH
385    /// workers must pass host-key verification before being trusted at
386    /// RemoteVerified level; unverified remotes stay at Sandbox.
387    #[serde(default)]
388    pub require_identity_verification: bool,
389    /// Allow conservative parallel execution of read-only tools (#2983).
390    /// When true, workers may batch independent read-only tool calls
391    /// (reads, searches, greps) into concurrent turns. Disabled by default
392    /// to avoid overwhelming providers or hitting rate limits.
393    #[serde(default)]
394    pub allow_parallel_reads: bool,
395}
396
397fn default_max_trust_level() -> FleetTrustLevel {
398    FleetTrustLevel::Operator
399}
400
401impl Default for FleetSecurityPolicy {
402    fn default() -> Self {
403        Self {
404            default_trust_level: FleetTrustLevel::Sandbox,
405            allowed_secrets: Vec::new(),
406            capability_grants: Vec::new(),
407            max_trust_level: FleetTrustLevel::Operator,
408            require_identity_verification: false,
409            allow_parallel_reads: false,
410        }
411    }
412}
413
414/// A reference to a secret that should be resolved at runtime, never
415/// serialized as a plaintext value.
416///
417/// Secret refs appear in task specs, alert configs, and worker definitions.
418/// The actual secret value is resolved by the fleet manager from the
419/// secrets backend (OS keyring, environment, or file store) just before
420/// the worker starts.
421#[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord)]
422pub struct FleetSecretRef {
423    /// The secret key name (e.g. `"CODEWHALE_API_KEY"`, `"GH_TOKEN"`).
424    pub key: String,
425    /// Optional source hint for resolution order.
426    /// - `"env"` — resolve from environment variable
427    /// - `"keyring"` — resolve from OS keyring
428    /// - `"file"` — resolve from `~/.codewhale/secrets/`
429    /// - absent / null — try all sources in default order
430    #[serde(skip_serializing_if = "Option::is_none")]
431    pub source: Option<String>,
432}
433
434impl FleetSecretRef {
435    /// Create a secret ref from a key name with default resolution.
436    #[must_use]
437    pub fn new(key: impl Into<String>) -> Self {
438        Self {
439            key: key.into(),
440            source: None,
441        }
442    }
443
444    /// Create a secret ref with an explicit source.
445    #[must_use]
446    pub fn with_source(key: impl Into<String>, source: impl Into<String>) -> Self {
447        Self {
448            key: key.into(),
449            source: Some(source.into()),
450        }
451    }
452
453    /// Redacted display form for logging. Shows the key name and source
454    /// but never the resolved value.
455    #[must_use]
456    pub fn redacted(&self) -> String {
457        match &self.source {
458            Some(src) => format!("<secret:{}.{}>", src, self.key),
459            None => format!("<secret:{}>", self.key),
460        }
461    }
462}
463
464impl std::fmt::Display for FleetSecretRef {
465    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
466        write!(f, "{}", self.redacted())
467    }
468}
469
470impl From<&str> for FleetSecretRef {
471    fn from(key: &str) -> Self {
472        Self::new(key)
473    }
474}
475
476impl From<String> for FleetSecretRef {
477    fn from(key: String) -> Self {
478        Self::new(key)
479    }
480}
481
482impl<'de> Deserialize<'de> for FleetSecretRef {
483    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
484    where
485        D: Deserializer<'de>,
486    {
487        #[derive(Deserialize)]
488        #[serde(untagged)]
489        enum SecretRefWire {
490            Key(String),
491            Structured {
492                key: String,
493                #[serde(default)]
494                source: Option<String>,
495            },
496        }
497
498        match SecretRefWire::deserialize(deserializer)? {
499            SecretRefWire::Key(key) if !key.trim().is_empty() => Ok(FleetSecretRef::new(key)),
500            SecretRefWire::Key(_) => Err(de::Error::custom("secret ref key cannot be empty")),
501            SecretRefWire::Structured { key, source } if !key.trim().is_empty() => {
502                Ok(FleetSecretRef { key, source })
503            }
504            SecretRefWire::Structured { .. } => {
505                Err(de::Error::custom("secret ref key cannot be empty"))
506            }
507        }
508    }
509}
510
511/// How a worker authenticates to the fleet manager.
512#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
513#[serde(tag = "method", rename_all = "snake_case")]
514pub enum FleetWorkerAuth {
515    /// No authentication (local workers share the same uid).
516    None,
517    /// SSH key-based authentication with host-key verification.
518    SshKey {
519        /// Path to the SSH identity file (may be a FleetSecretRef in JSON
520        /// as `{"key": "...", "source": "file"}`).
521        identity: PathBuf,
522        /// Known hosts file for host-key verification.
523        #[serde(skip_serializing_if = "Option::is_none")]
524        known_hosts: Option<PathBuf>,
525        /// Expected host key fingerprint for pinning.
526        #[serde(skip_serializing_if = "Option::is_none")]
527        host_key_fingerprint: Option<String>,
528        /// SSH user for the connection.
529        #[serde(skip_serializing_if = "Option::is_none")]
530        user: Option<String>,
531    },
532    /// Token-based authentication for remote workers behind a fleet proxy.
533    Token {
534        /// Reference to the token secret.
535        token_ref: FleetSecretRef,
536    },
537    /// mTLS certificate-based authentication.
538    Mtls {
539        /// Path to the client certificate.
540        cert_path: PathBuf,
541        /// Reference to the private key secret.
542        key_ref: FleetSecretRef,
543    },
544}
545
546/// A capability grant that explicitly authorizes a worker to perform
547/// a specific class of action.
548///
549/// By default, new workers get no grants (least privilege). Grants are
550/// additive: a worker's effective capabilities are the union of its
551/// trust-level defaults plus any explicit grants.
552#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
553pub struct FleetCapabilityGrant {
554    /// The capability being granted (e.g. `"network"`, `"git-push"`,
555    /// `"provider-secrets"`, `"release"`).
556    pub capability: String,
557    /// Optional scope limiting the grant (e.g. `"github.com"` for network,
558    /// `"crates/tui/**"` for file writes).
559    #[serde(skip_serializing_if = "Option::is_none")]
560    pub scope: Option<String>,
561    /// Optional justification for the grant (audit trail).
562    #[serde(skip_serializing_if = "Option::is_none")]
563    pub reason: Option<String>,
564}
565
566/// Runtime status of a worker.
567#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
568#[serde(rename_all = "snake_case")]
569pub enum FleetWorkerStatus {
570    Unknown,
571    Online,
572    Busy,
573    Offline,
574    Unhealthy,
575    Draining,
576    Retired,
577}
578
579/// Durable inbox entry: a task waiting to be leased to a worker.
580#[derive(Debug, Clone, Serialize, Deserialize)]
581pub struct FleetInboxEntry {
582    pub run_id: FleetRunId,
583    pub task_id: String,
584    pub priority: i32,
585    pub enqueued_at: String,
586    #[serde(default)]
587    pub lease_deadline: Option<String>,
588    #[serde(default)]
589    pub attempts: u32,
590}
591
592/// Worker event envelope.
593#[derive(Debug, Clone, Serialize, Deserialize)]
594pub struct FleetWorkerEvent {
595    pub seq: u64,
596    pub run_id: FleetRunId,
597    pub worker_id: String,
598    pub task_id: String,
599    pub timestamp: String,
600    #[serde(flatten)]
601    pub payload: FleetWorkerEventPayload,
602    #[serde(default)]
603    #[serde(skip_serializing_if = "BTreeMap::is_empty")]
604    pub extra: BTreeMap<String, Value>,
605}
606
607/// Union of all worker event payloads.
608#[derive(Debug, Clone, Serialize, Deserialize)]
609#[serde(tag = "state", rename_all = "snake_case")]
610pub enum FleetWorkerEventPayload {
611    Queued,
612    Leased {
613        #[serde(skip_serializing_if = "Option::is_none")]
614        lease_expires_at: Option<String>,
615    },
616    Starting,
617    Running,
618    ModelWait {
619        #[serde(skip_serializing_if = "Option::is_none")]
620        model: Option<String>,
621    },
622    RunningTool {
623        tool: String,
624        #[serde(skip_serializing_if = "Option::is_none")]
625        call_id: Option<String>,
626    },
627    Heartbeat {
628        #[serde(default)]
629        #[serde(skip_serializing_if = "Option::is_none")]
630        cpu_percent: Option<f32>,
631        #[serde(default)]
632        #[serde(skip_serializing_if = "Option::is_none")]
633        memory_mb: Option<u64>,
634    },
635    Artifact(FleetArtifactRef),
636    Completed {
637        #[serde(default)]
638        #[serde(skip_serializing_if = "Option::is_none")]
639        exit_code: Option<i32>,
640        #[serde(skip_serializing_if = "Option::is_none")]
641        summary: Option<String>,
642    },
643    Failed {
644        reason: String,
645        #[serde(default)]
646        recoverable: bool,
647    },
648    Cancelled {
649        #[serde(skip_serializing_if = "Option::is_none")]
650        cancelled_by: Option<String>,
651    },
652    Interrupted {
653        #[serde(skip_serializing_if = "Option::is_none")]
654        signal: Option<String>,
655    },
656    Stale {
657        #[serde(skip_serializing_if = "Option::is_none")]
658        last_heartbeat_at: Option<String>,
659    },
660    Restarted {
661        #[serde(default)]
662        restart_count: u32,
663    },
664    Escalated {
665        channel: String,
666        #[serde(skip_serializing_if = "Option::is_none")]
667        alert_id: Option<String>,
668    },
669}
670
671/// Retry policy for a task or worker.
672#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
673pub struct FleetRetryPolicy {
674    #[serde(default = "default_retry_max_attempts")]
675    pub max_attempts: u32,
676    #[serde(default = "default_retry_initial_backoff_seconds")]
677    pub initial_backoff_seconds: u64,
678    #[serde(default = "default_retry_max_backoff_seconds")]
679    pub max_backoff_seconds: u64,
680    #[serde(default = "default_retry_backoff_multiplier")]
681    pub backoff_multiplier: u32,
682}
683
684impl Default for FleetRetryPolicy {
685    fn default() -> Self {
686        Self {
687            max_attempts: 3,
688            initial_backoff_seconds: 5,
689            max_backoff_seconds: 300,
690            backoff_multiplier: 2,
691        }
692    }
693}
694
695fn default_retry_max_attempts() -> u32 {
696    FleetRetryPolicy::default().max_attempts
697}
698
699fn default_retry_initial_backoff_seconds() -> u64 {
700    FleetRetryPolicy::default().initial_backoff_seconds
701}
702
703fn default_retry_max_backoff_seconds() -> u64 {
704    FleetRetryPolicy::default().max_backoff_seconds
705}
706
707fn default_retry_backoff_multiplier() -> u32 {
708    FleetRetryPolicy::default().backoff_multiplier
709}
710
711/// Alert/escalation policy attached to a task or run.
712#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
713pub struct FleetAlertPolicy {
714    #[serde(default)]
715    #[serde(skip_serializing_if = "Vec::is_empty")]
716    pub events: Vec<FleetAlertEventClass>,
717    #[serde(default)]
718    pub channels: Vec<FleetAlertChannel>,
719    #[serde(default)]
720    pub after_attempts: Option<u32>,
721    #[serde(default)]
722    pub after_minutes_stale: Option<u64>,
723}
724
725#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
726#[serde(rename_all = "snake_case")]
727pub enum FleetAlertEventClass {
728    Stale,
729    RestartExhausted,
730    NeedsHuman,
731    BudgetExceeded,
732    VerifierFailed,
733    RunCompleted,
734}
735
736#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
737#[serde(tag = "kind", rename_all = "snake_case")]
738pub enum FleetAlertChannel {
739    Slack {
740        /// Webhook URL, resolved from a secret ref or inline.
741        #[serde(flatten)]
742        webhook: FleetAlertEndpoint,
743    },
744    Webhook {
745        #[serde(flatten)]
746        endpoint: FleetAlertEndpoint,
747    },
748    #[serde(alias = "pager_duty")]
749    #[serde(alias = "pagerduty")]
750    PagerDuty {
751        routing_key: String,
752        severity: String,
753    },
754}
755
756/// An alert channel endpoint, supporting both inline URLs and secret refs.
757///
758/// For Slack and generic webhook channels, the URL may be provided directly
759/// or as a secret reference resolved at send time. When both `url` and
760/// `url_ref` are present, `url_ref` takes precedence after resolution.
761#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
762pub struct FleetAlertEndpoint {
763    /// Inline URL (plaintext; only for non-sensitive endpoints).
764    #[serde(
765        alias = "webhook_url",
766        alias = "endpoint_url",
767        skip_serializing_if = "Option::is_none"
768    )]
769    pub url: Option<String>,
770    /// Reference to a secret containing the webhook URL.
771    #[serde(
772        alias = "webhook_url_ref",
773        alias = "webhook_ref",
774        alias = "url_secret_ref",
775        skip_serializing_if = "Option::is_none"
776    )]
777    pub url_ref: Option<FleetSecretRef>,
778    /// Optional HMAC secret for webhook payload signing, as a secret ref.
779    #[serde(
780        alias = "secret",
781        alias = "webhook_secret",
782        alias = "signing_secret",
783        skip_serializing_if = "Option::is_none"
784    )]
785    pub secret_ref: Option<FleetSecretRef>,
786}
787
788impl FleetAlertEndpoint {
789    /// Create an inline URL endpoint (for non-sensitive use).
790    #[must_use]
791    pub fn inline(url: impl Into<String>) -> Self {
792        Self {
793            url: Some(url.into()),
794            url_ref: None,
795            secret_ref: None,
796        }
797    }
798
799    /// Create a secret-backed URL endpoint.
800    #[must_use]
801    pub fn from_secret(url_ref: FleetSecretRef) -> Self {
802        Self {
803            url: None,
804            url_ref: Some(url_ref),
805            secret_ref: None,
806        }
807    }
808
809    /// Redacted display form for logging.
810    #[must_use]
811    pub fn redacted(&self) -> String {
812        self.url_ref
813            .as_ref()
814            .map_or_else(|| "<inline-url>".to_string(), |r| r.redacted())
815    }
816}
817
818/// Receipt produced when a task completes verification.
819#[derive(Debug, Clone, Serialize, Deserialize)]
820pub struct FleetReceipt {
821    pub run_id: FleetRunId,
822    pub task_id: String,
823    pub worker_id: String,
824    pub completed_at: String,
825    pub result: FleetTaskResult,
826    #[serde(skip_serializing_if = "Option::is_none")]
827    pub failure_kind: Option<FleetTaskFailureKind>,
828    #[serde(default)]
829    pub artifacts: Vec<FleetArtifactRef>,
830    #[serde(default)]
831    pub score: Option<FleetScore>,
832}
833
834#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
835#[serde(rename_all = "snake_case")]
836pub enum FleetTaskResult {
837    Pass,
838    Partial,
839    Fail,
840    Skip,
841    Timeout,
842}
843
844/// Source category for a failed task receipt.
845#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
846#[serde(rename_all = "snake_case")]
847pub enum FleetTaskFailureKind {
848    Transport,
849    Task,
850    Verifier,
851}
852
853#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
854pub struct FleetScore {
855    pub value: f64,
856    #[serde(skip_serializing_if = "Option::is_none")]
857    pub max: Option<f64>,
858    #[serde(skip_serializing_if = "Option::is_none")]
859    pub notes: Option<String>,
860}
861
862#[cfg(test)]
863mod tests {
864    use super::*;
865
866    #[test]
867    fn fleet_run_round_trip() {
868        let run = FleetRun {
869            id: FleetRunId::from("run-001"),
870            name: "dogfood smoke".to_string(),
871            status: FleetRunStatus::Running,
872            task_specs: vec![FleetTaskSpec {
873                id: "task-1".to_string(),
874                name: "lint".to_string(),
875                description: None,
876                objective: Some("Keep the workspace lint-clean".to_string()),
877                instructions: "run cargo clippy".to_string(),
878                worker: Some(FleetTaskWorkerProfile {
879                    role: Some("release-checker".to_string()),
880                    tool_profile: Some("read-only".to_string()),
881                    tools: vec!["cargo".to_string()],
882                    capabilities: vec!["rust".to_string()],
883                }),
884                workspace: Some(FleetWorkspaceRequirements {
885                    root: Some(PathBuf::from(".")),
886                    required_files: vec![PathBuf::from("Cargo.toml")],
887                    writable_paths: vec![],
888                    environment: Some(FleetEnvironmentRequirements {
889                        required: vec!["PATH".to_string()],
890                        allowlist: vec!["RUST_LOG".to_string()],
891                    }),
892                }),
893                input_files: vec![PathBuf::from("crates/tui/src/main.rs")],
894                context: vec!["release gate".to_string()],
895                budget: Some(FleetTaskBudget {
896                    max_tokens: Some(8000),
897                    max_tool_calls: Some(20),
898                    max_seconds: Some(300),
899                }),
900                tags: vec!["release".to_string()],
901                expected_artifacts: vec![FleetArtifactKind::Log],
902                scorer: Some(FleetScorerSpec::ExitCode),
903                retry_policy: Some(FleetRetryPolicy::default()),
904                alert_policy: None,
905                timeout_seconds: Some(300),
906                metadata: BTreeMap::new(),
907            }],
908            worker_specs: vec![],
909            labels: BTreeMap::new(),
910            security_policy: None,
911            created_at: "2026-06-12T17:00:00Z".to_string(),
912            updated_at: None,
913            completed_at: None,
914        };
915        let json = serde_json::to_string(&run).unwrap();
916        let back: FleetRun = serde_json::from_str(&json).unwrap();
917        assert_eq!(back.id, run.id);
918        assert_eq!(back.status, FleetRunStatus::Running);
919        assert_eq!(back.task_specs.len(), 1);
920        assert_eq!(
921            back.task_specs[0].worker.as_ref().unwrap().role.as_deref(),
922            Some("release-checker")
923        );
924        assert_eq!(
925            back.task_specs[0]
926                .workspace
927                .as_ref()
928                .unwrap()
929                .required_files,
930            vec![PathBuf::from("Cargo.toml")]
931        );
932    }
933
934    #[test]
935    fn worker_event_lifecycle_round_trip() {
936        let events = vec![
937            FleetWorkerEvent {
938                seq: 1,
939                run_id: FleetRunId::from("run-002"),
940                worker_id: "worker-a".to_string(),
941                task_id: "task-1".to_string(),
942                timestamp: "2026-06-12T17:01:00Z".to_string(),
943                payload: FleetWorkerEventPayload::Queued,
944                extra: BTreeMap::new(),
945            },
946            FleetWorkerEvent {
947                seq: 2,
948                run_id: FleetRunId::from("run-002"),
949                worker_id: "worker-a".to_string(),
950                task_id: "task-1".to_string(),
951                timestamp: "2026-06-12T17:01:05Z".to_string(),
952                payload: FleetWorkerEventPayload::RunningTool {
953                    tool: "bash".to_string(),
954                    call_id: Some("call-1".to_string()),
955                },
956                extra: BTreeMap::new(),
957            },
958            FleetWorkerEvent {
959                seq: 3,
960                run_id: FleetRunId::from("run-002"),
961                worker_id: "worker-a".to_string(),
962                task_id: "task-1".to_string(),
963                timestamp: "2026-06-12T17:02:00Z".to_string(),
964                payload: FleetWorkerEventPayload::Completed {
965                    exit_code: Some(0),
966                    summary: Some("ok".to_string()),
967                },
968                extra: BTreeMap::new(),
969            },
970        ];
971        let json = serde_json::to_string(&events).unwrap();
972        let back: Vec<FleetWorkerEvent> = serde_json::from_str(&json).unwrap();
973        assert_eq!(back.len(), 3);
974        assert!(matches!(back[0].payload, FleetWorkerEventPayload::Queued));
975        assert!(matches!(
976            back[2].payload,
977            FleetWorkerEventPayload::Completed { .. }
978        ));
979    }
980
981    #[test]
982    fn alert_policy_round_trip() {
983        let policy = FleetAlertPolicy {
984            events: vec![FleetAlertEventClass::Stale],
985            channels: vec![FleetAlertChannel::Slack {
986                webhook: FleetAlertEndpoint::inline("https://hooks.slack.com/test"),
987            }],
988            after_attempts: Some(2),
989            after_minutes_stale: Some(10),
990        };
991        let json = serde_json::to_string(&policy).unwrap();
992        assert!(json.contains("\"events\":[\"stale\"]"));
993        assert!(json.contains("\"kind\":\"slack\""));
994        let back: FleetAlertPolicy = serde_json::from_str(&json).unwrap();
995        assert_eq!(back.events, vec![FleetAlertEventClass::Stale]);
996        assert_eq!(back.after_attempts, Some(2));
997    }
998
999    #[test]
1000    fn artifact_other_kind_round_trip() {
1001        let artifact = FleetArtifactRef {
1002            kind: FleetArtifactKind::Other("coverage.xml".to_string()),
1003            path: PathBuf::from("/tmp/coverage.xml"),
1004            checksum: Some("sha256:abc".to_string()),
1005            mime_type: Some("application/xml".to_string()),
1006            size_bytes: Some(1024),
1007        };
1008        let json = serde_json::to_string(&artifact).unwrap();
1009        let back: FleetArtifactRef = serde_json::from_str(&json).unwrap();
1010        assert_eq!(back.kind, artifact.kind);
1011        assert_eq!(back.size_bytes, Some(1024));
1012    }
1013
1014    #[test]
1015    fn ssh_host_spec_accepts_minimal_legacy_json() {
1016        let json = r#"{"kind":"ssh","host":"builder.example.test"}"#;
1017        let host: FleetHostSpec = serde_json::from_str(json).unwrap();
1018
1019        match host {
1020            FleetHostSpec::Ssh {
1021                host,
1022                port,
1023                user,
1024                identity,
1025                known_hosts,
1026                host_key_fingerprint,
1027                working_directory,
1028                env_allowlist,
1029                codewhale_binary,
1030            } => {
1031                assert_eq!(host, "builder.example.test");
1032                assert_eq!(port, None);
1033                assert_eq!(user, None);
1034                assert_eq!(identity, None);
1035                assert_eq!(known_hosts, None);
1036                assert_eq!(host_key_fingerprint, None);
1037                assert_eq!(working_directory, None);
1038                assert!(env_allowlist.is_empty());
1039                assert_eq!(codewhale_binary, None);
1040            }
1041            other => panic!("expected ssh host spec, got {other:?}"),
1042        }
1043    }
1044
1045    #[test]
1046    fn artifact_kind_uses_flat_string_json() {
1047        let known = serde_json::to_string(&FleetArtifactKind::TestResult).unwrap();
1048        assert_eq!(known, "\"test_result\"");
1049
1050        let custom =
1051            serde_json::to_string(&FleetArtifactKind::Other("coverage.xml".to_string())).unwrap();
1052        assert_eq!(custom, "\"coverage.xml\"");
1053
1054        let parsed: FleetArtifactKind = serde_json::from_str("\"coverage.xml\"").unwrap();
1055        assert_eq!(parsed, FleetArtifactKind::Other("coverage.xml".to_string()));
1056    }
1057
1058    #[test]
1059    fn retry_policy_missing_fields_use_nonzero_defaults() {
1060        let policy: FleetRetryPolicy = serde_json::from_value(serde_json::json!({})).unwrap();
1061        assert_eq!(policy, FleetRetryPolicy::default());
1062
1063        let policy: FleetRetryPolicy =
1064            serde_json::from_value(serde_json::json!({"max_attempts": 5})).unwrap();
1065        assert_eq!(policy.max_attempts, 5);
1066        assert_eq!(
1067            policy.initial_backoff_seconds,
1068            FleetRetryPolicy::default().initial_backoff_seconds
1069        );
1070        assert_eq!(
1071            policy.max_backoff_seconds,
1072            FleetRetryPolicy::default().max_backoff_seconds
1073        );
1074        assert_eq!(
1075            policy.backoff_multiplier,
1076            FleetRetryPolicy::default().backoff_multiplier
1077        );
1078    }
1079
1080    #[test]
1081    fn sparse_worker_events_omit_absent_optional_fields() {
1082        let heartbeat = FleetWorkerEventPayload::Heartbeat {
1083            cpu_percent: None,
1084            memory_mb: None,
1085        };
1086        let heartbeat_json = serde_json::to_value(&heartbeat).unwrap();
1087        assert_eq!(heartbeat_json, serde_json::json!({"state": "heartbeat"}));
1088
1089        let completed = FleetWorkerEventPayload::Completed {
1090            exit_code: None,
1091            summary: None,
1092        };
1093        let completed_json = serde_json::to_value(&completed).unwrap();
1094        assert_eq!(completed_json, serde_json::json!({"state": "completed"}));
1095    }
1096
1097    #[test]
1098    fn receipt_round_trip() {
1099        let receipt = FleetReceipt {
1100            run_id: FleetRunId::from("run-003"),
1101            task_id: "task-1".to_string(),
1102            worker_id: "worker-b".to_string(),
1103            completed_at: "2026-06-12T17:03:00Z".to_string(),
1104            result: FleetTaskResult::Pass,
1105            failure_kind: None,
1106            artifacts: vec![],
1107            score: Some(FleetScore {
1108                value: 0.95,
1109                max: Some(1.0),
1110                notes: None,
1111            }),
1112        };
1113        let json = serde_json::to_string(&receipt).unwrap();
1114        let back: FleetReceipt = serde_json::from_str(&json).unwrap();
1115        assert_eq!(back.result, FleetTaskResult::Pass);
1116        assert_eq!(back.score.as_ref().unwrap().value, 0.95);
1117    }
1118
1119    #[test]
1120    fn partial_receipt_records_failure_source_when_needed() {
1121        let receipt = FleetReceipt {
1122            run_id: FleetRunId::from("run-004"),
1123            task_id: "task-2".to_string(),
1124            worker_id: "worker-c".to_string(),
1125            completed_at: "2026-06-12T17:04:00Z".to_string(),
1126            result: FleetTaskResult::Partial,
1127            failure_kind: Some(FleetTaskFailureKind::Verifier),
1128            artifacts: vec![],
1129            score: Some(FleetScore {
1130                value: 0.5,
1131                max: Some(1.0),
1132                notes: Some("manual verification required".to_string()),
1133            }),
1134        };
1135
1136        let json = serde_json::to_string(&receipt).unwrap();
1137        assert!(json.contains("\"result\":\"partial\""));
1138        assert!(json.contains("\"failure_kind\":\"verifier\""));
1139        let back: FleetReceipt = serde_json::from_str(&json).unwrap();
1140        assert_eq!(back.result, FleetTaskResult::Partial);
1141        assert_eq!(back.failure_kind, Some(FleetTaskFailureKind::Verifier));
1142    }
1143
1144    #[test]
1145    fn ssh_host_spec_with_key_pinning_round_trip() {
1146        let spec = FleetHostSpec::Ssh {
1147            host: "builder.trusted.example.com".to_string(),
1148            port: Some(22),
1149            user: Some("codewhale".to_string()),
1150            identity: Some(PathBuf::from("~/.ssh/codewhale_fleet")),
1151            known_hosts: Some(PathBuf::from("~/.ssh/known_hosts")),
1152            host_key_fingerprint: Some("SHA256:aLGqZo1M6c...".to_string()),
1153            working_directory: Some(PathBuf::from("/srv/codewhale/work")),
1154            env_allowlist: vec!["CODEWHALE_PROFILE".to_string()],
1155            codewhale_binary: Some("/usr/local/bin/codewhale".to_string()),
1156        };
1157        let json = serde_json::to_string_pretty(&spec).unwrap();
1158        assert!(json.contains("\"known_hosts\""));
1159        assert!(json.contains("\"host_key_fingerprint\""));
1160        assert!(json.contains("SHA256:aLGqZo1M6c..."));
1161
1162        let back: FleetHostSpec = serde_json::from_str(&json).unwrap();
1163        match back {
1164            FleetHostSpec::Ssh {
1165                host,
1166                known_hosts,
1167                host_key_fingerprint,
1168                ..
1169            } => {
1170                assert_eq!(host, "builder.trusted.example.com");
1171                assert_eq!(known_hosts, Some(PathBuf::from("~/.ssh/known_hosts")));
1172                assert_eq!(
1173                    host_key_fingerprint,
1174                    Some("SHA256:aLGqZo1M6c...".to_string())
1175                );
1176            }
1177            other => panic!("expected ssh host spec, got {other:?}"),
1178        }
1179    }
1180
1181    #[test]
1182    fn secret_ref_redacted_never_exposes_value() {
1183        let ref_ = FleetSecretRef::new("DEEPSEEK_API_KEY");
1184        let redacted = ref_.redacted();
1185        assert!(redacted.contains("DEEPSEEK_API_KEY"));
1186        assert!(!redacted.contains("sk-"));
1187        assert!(redacted.contains("<secret:"));
1188
1189        let ref_ = FleetSecretRef::with_source("GH_TOKEN", "env");
1190        let redacted = ref_.redacted();
1191        assert!(redacted.contains("env.GH_TOKEN"));
1192        assert!(!redacted.contains("ghp_"));
1193    }
1194
1195    #[test]
1196    fn alert_endpoint_from_secret_round_trip() {
1197        let endpoint = FleetAlertEndpoint::from_secret(FleetSecretRef::new("SLACK_WEBHOOK"));
1198        let json = serde_json::to_string(&endpoint).unwrap();
1199        assert!(json.contains("SLACK_WEBHOOK"));
1200        assert!(!json.contains("hooks.slack.com"));
1201
1202        let back: FleetAlertEndpoint = serde_json::from_str(&json).unwrap();
1203        assert_eq!(back.url_ref.as_ref().unwrap().key, "SLACK_WEBHOOK");
1204        assert_eq!(back.url, None);
1205    }
1206
1207    #[test]
1208    fn secret_ref_accepts_legacy_string_wire_shape() {
1209        let ref_: FleetSecretRef = serde_json::from_str(r#""CODEWHALE_FLEET_TOKEN""#).unwrap();
1210        assert_eq!(ref_, FleetSecretRef::new("CODEWHALE_FLEET_TOKEN"));
1211
1212        let ref_: FleetSecretRef =
1213            serde_json::from_str(r#"{"key":"GH_TOKEN","source":"env"}"#).unwrap();
1214        assert_eq!(ref_, FleetSecretRef::with_source("GH_TOKEN", "env"));
1215    }
1216
1217    #[test]
1218    fn trust_level_accepts_hyphenated_remote_verified() {
1219        let trust: FleetTrustLevel = serde_json::from_str(r#""remote-verified""#).unwrap();
1220        assert_eq!(trust, FleetTrustLevel::RemoteVerified);
1221
1222        let canonical = serde_json::to_string(&trust).unwrap();
1223        assert_eq!(canonical, r#""remote_verified""#);
1224    }
1225
1226    #[test]
1227    fn alert_channel_accepts_legacy_webhook_fields() {
1228        let channel: FleetAlertChannel = serde_json::from_str(
1229            r#"{
1230                "kind": "slack",
1231                "webhook_url": "https://hooks.slack.com/test",
1232                "secret": "SLACK_SIGNING_SECRET"
1233            }"#,
1234        )
1235        .unwrap();
1236
1237        match channel {
1238            FleetAlertChannel::Slack { webhook } => {
1239                assert_eq!(webhook.url.as_deref(), Some("https://hooks.slack.com/test"));
1240                assert_eq!(
1241                    webhook.secret_ref,
1242                    Some(FleetSecretRef::new("SLACK_SIGNING_SECRET"))
1243                );
1244            }
1245            other => panic!("expected slack channel, got {other:?}"),
1246        }
1247    }
1248
1249    #[test]
1250    fn security_policy_defaults_are_conservative() {
1251        let policy = FleetSecurityPolicy::default();
1252        assert_eq!(policy.default_trust_level, FleetTrustLevel::Sandbox);
1253        assert!(policy.allowed_secrets.is_empty());
1254        assert!(policy.capability_grants.is_empty());
1255        assert_eq!(policy.max_trust_level, FleetTrustLevel::Operator);
1256        assert!(!policy.require_identity_verification);
1257    }
1258
1259    #[test]
1260    fn trust_level_ordinal_reflects_privilege() {
1261        assert!(FleetTrustLevel::Operator > FleetTrustLevel::RemoteVerified);
1262        assert!(FleetTrustLevel::RemoteVerified > FleetTrustLevel::Local);
1263        assert!(FleetTrustLevel::Local > FleetTrustLevel::Sandbox);
1264
1265        assert!(FleetTrustLevel::Operator.may_access_secrets());
1266        assert!(!FleetTrustLevel::Sandbox.may_access_secrets());
1267        assert!(!FleetTrustLevel::Sandbox.may_write_workspace());
1268        assert!(FleetTrustLevel::Operator.may_write_workspace());
1269    }
1270}