Skip to main content

zlayer_secrets/
worker_bootstrap.rs

1//! Worker-tier bootstrap tokens.
2//!
3//! Tokens are short-lived, cluster-CA-signed credentials a worker presents
4//! during gRPC `Register`. The cluster signer (Ed25519) already used for join
5//! tokens signs these too — reuse, don't duplicate identity material.
6//!
7//! Token format (postcard2-encoded, then URL-safe-base64 for the CLI flag):
8//!
9//! ```text
10//! WorkerBootstrapToken {
11//!     claims: WorkerBootstrapClaims {
12//!         domain_tag: "zlayer-worker-bootstrap-v1",
13//!         cluster_id: String,
14//!         jti: Uuid (as String),
15//!         issued_at_unix: i64,
16//!         expires_at_unix: i64,
17//!         max_uses: u32,
18//!         permitted_labels: Vec<(String, String)>,
19//!     },
20//!     signer_kid: String,
21//!     signature_b64: String,
22//! }
23//! ```
24//!
25//! The signed payload covers every claim (postcard2-encoded
26//! [`WorkerBootstrapClaims`]).
27//!
28//! Usage counting is the caller's responsibility — typically a
29//! `SecretsRaftOp` that records `jti → uses` in the FSM. Verification only
30//! checks signature + expiry; the caller passes the current usage count and
31//! compares against `max_uses`.
32//!
33//! Multi-key (rotation/grace) verification: the caller is responsible for
34//! looking up the right signer by `token.signer_kid` via
35//! [`crate::load_signer_for_kid`] before calling
36//! [`verify_worker_bootstrap_token`]. This module verifies against a single
37//! [`ClusterSigner`] whose `key_id()` must equal the token's `signer_kid`.
38
39use base64::engine::general_purpose::URL_SAFE_NO_PAD;
40use base64::Engine as _;
41use ed25519_dalek::Verifier;
42use serde::{Deserialize, Serialize};
43
44use crate::{ClusterSigner, Result, SecretsError};
45
46/// Tag string written into the signed payload so a verifier can't confuse
47/// a worker bootstrap token with some other Ed25519-signed blob.
48const DOMAIN_TAG: &str = "zlayer-worker-bootstrap-v1";
49
50/// Token claims (the signed portion).
51#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
52pub struct WorkerBootstrapClaims {
53    /// `domain_tag` — equals the module-internal domain constant. Reject if
54    /// mismatched.
55    pub domain_tag: String,
56    /// Cluster ID this token belongs to (random UUID issued at bootstrap).
57    pub cluster_id: String,
58    /// Unique token ID — used by the caller's usage-tracking layer.
59    pub jti: String,
60    /// Unix-seconds when the token was issued.
61    pub issued_at_unix: i64,
62    /// Unix-seconds when the token expires.
63    pub expires_at_unix: i64,
64    /// Maximum number of times this token may be redeemed. 0 means unlimited
65    /// (not recommended outside dev).
66    pub max_uses: u32,
67    /// Optional label whitelist — when non-empty, the worker's profile must
68    /// declare each of these labels (any extra labels are ignored).
69    #[serde(default)]
70    pub permitted_labels: Vec<(String, String)>,
71}
72
73/// Full signed token (claims + signer kid + signature).
74#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
75pub struct WorkerBootstrapToken {
76    pub claims: WorkerBootstrapClaims,
77    /// Key id of the signing key (matches [`ClusterSigner::key_id`]).
78    pub signer_kid: String,
79    /// Ed25519 signature over postcard2-encoded claims, base64-url-no-pad.
80    pub signature_b64: String,
81}
82
83impl WorkerBootstrapToken {
84    /// Encode the token as a single URL-safe-base64 string suitable for CLI
85    /// flags / files.
86    ///
87    /// # Errors
88    ///
89    /// Returns [`SecretsError::Encryption`] on postcard2 serialization
90    /// failure.
91    pub fn to_cli_string(&self) -> Result<String> {
92        let bytes = postcard2::to_vec(self)
93            .map_err(|e| SecretsError::Encryption(format!("encode worker token: {e}")))?;
94        Ok(URL_SAFE_NO_PAD.encode(bytes))
95    }
96
97    /// Decode a token previously produced by [`Self::to_cli_string`].
98    ///
99    /// # Errors
100    ///
101    /// Returns [`SecretsError::Encryption`] on base64 / postcard2 failure.
102    pub fn from_cli_string(s: &str) -> Result<Self> {
103        let bytes = URL_SAFE_NO_PAD
104            .decode(s)
105            .map_err(|e| SecretsError::Encryption(format!("decode worker token base64: {e}")))?;
106        postcard2::from_bytes(&bytes)
107            .map_err(|e| SecretsError::Encryption(format!("decode worker token postcard2: {e}")))
108    }
109}
110
111/// Issue a fresh bootstrap token signed by the supplied [`ClusterSigner`].
112///
113/// # Errors
114///
115/// Returns [`SecretsError::Encryption`] on encoding failure.
116pub fn issue_worker_bootstrap_token(
117    signer: &ClusterSigner,
118    cluster_id: impl Into<String>,
119    valid_for_secs: i64,
120    max_uses: u32,
121    permitted_labels: Vec<(String, String)>,
122) -> Result<WorkerBootstrapToken> {
123    let now = time::OffsetDateTime::now_utc().unix_timestamp();
124    let jti = uuid::Uuid::new_v4().to_string();
125
126    let claims = WorkerBootstrapClaims {
127        domain_tag: DOMAIN_TAG.into(),
128        cluster_id: cluster_id.into(),
129        jti,
130        issued_at_unix: now,
131        expires_at_unix: now + valid_for_secs,
132        max_uses,
133        permitted_labels,
134    };
135
136    let payload = postcard2::to_vec(&claims)
137        .map_err(|e| SecretsError::Encryption(format!("encode bootstrap claims: {e}")))?;
138
139    let sig_bytes = signer.sign(&payload);
140
141    Ok(WorkerBootstrapToken {
142        claims,
143        signer_kid: signer.key_id(),
144        signature_b64: URL_SAFE_NO_PAD.encode(sig_bytes),
145    })
146}
147
148/// Verify a token's signature, domain tag, and expiry. The caller is
149/// responsible for `max_uses` tracking (typically via the Raft FSM).
150///
151/// `signer` must be the [`ClusterSigner`] whose [`ClusterSigner::key_id`]
152/// equals `token.signer_kid` — for in-grace keys, the caller should look up
153/// the right signer via [`crate::load_signer_for_kid`] before calling this.
154///
155/// Returns the claims on success — caller checks `jti`/`max_uses` against
156/// the usage counter.
157///
158/// # Errors
159///
160/// Returns [`SecretsError::Encryption`] with a human-readable reason on any
161/// validation failure.
162pub fn verify_worker_bootstrap_token(
163    signer: &ClusterSigner,
164    token: &WorkerBootstrapToken,
165) -> Result<WorkerBootstrapClaims> {
166    if token.claims.domain_tag != DOMAIN_TAG {
167        return Err(SecretsError::Encryption(format!(
168            "wrong token domain: expected {DOMAIN_TAG}, got {}",
169            token.claims.domain_tag
170        )));
171    }
172
173    if signer.key_id() != token.signer_kid {
174        return Err(SecretsError::Encryption(format!(
175            "signer kid mismatch: signer has {}, token claims {}",
176            signer.key_id(),
177            token.signer_kid
178        )));
179    }
180
181    let now = time::OffsetDateTime::now_utc().unix_timestamp();
182    if now >= token.claims.expires_at_unix {
183        return Err(SecretsError::Encryption("token expired".into()));
184    }
185    if token.claims.issued_at_unix > now + 60 {
186        return Err(SecretsError::Encryption(
187            "token issued more than 60s in the future".into(),
188        ));
189    }
190
191    let sig_bytes = URL_SAFE_NO_PAD
192        .decode(&token.signature_b64)
193        .map_err(|e| SecretsError::Encryption(format!("decode token signature: {e}")))?;
194    let sig_array: [u8; 64] = sig_bytes
195        .as_slice()
196        .try_into()
197        .map_err(|_| SecretsError::Encryption("token signature wrong length".into()))?;
198    let signature = ed25519_dalek::Signature::from_bytes(&sig_array);
199
200    let payload = postcard2::to_vec(&token.claims)
201        .map_err(|e| SecretsError::Encryption(format!("re-encode claims: {e}")))?;
202    signer
203        .verifying_key()
204        .verify(&payload, &signature)
205        .map_err(|e| SecretsError::Encryption(format!("token signature invalid: {e}")))?;
206
207    Ok(token.claims.clone())
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213    use tempfile::TempDir;
214
215    async fn make_signer() -> (ClusterSigner, TempDir) {
216        let dir = TempDir::new().expect("tempdir");
217        let path = dir.path().join("cluster_signer.json");
218        let signer = ClusterSigner::load_or_generate(&path)
219            .await
220            .expect("load_or_generate");
221        (signer, dir)
222    }
223
224    #[tokio::test]
225    async fn issue_and_verify_round_trip() {
226        let (signer, _dir) = make_signer().await;
227        let token = issue_worker_bootstrap_token(
228            &signer,
229            "cluster-abc",
230            3600,
231            1,
232            vec![("region".into(), "us-east".into())],
233        )
234        .expect("issue");
235
236        let s = token.to_cli_string().expect("encode");
237        let parsed = WorkerBootstrapToken::from_cli_string(&s).expect("decode");
238        assert_eq!(token, parsed);
239
240        let claims = verify_worker_bootstrap_token(&signer, &parsed).expect("verify");
241        assert_eq!(claims.cluster_id, "cluster-abc");
242        assert_eq!(claims.max_uses, 1);
243        assert_eq!(
244            claims.permitted_labels,
245            vec![("region".into(), "us-east".into())]
246        );
247    }
248
249    #[tokio::test]
250    async fn expired_token_rejected() {
251        let (signer, _dir) = make_signer().await;
252        let mut token = issue_worker_bootstrap_token(&signer, "c", 3600, 1, vec![]).expect("issue");
253        token.claims.expires_at_unix = 0; // far past
254
255        // Re-sign so it's a "valid signature on a stale claims" token (the
256        // attacker case: tampering with expiry).
257        let payload = postcard2::to_vec(&token.claims).unwrap();
258        let sig = signer.sign(&payload);
259        token.signer_kid = signer.key_id();
260        token.signature_b64 = URL_SAFE_NO_PAD.encode(sig);
261
262        let err = verify_worker_bootstrap_token(&signer, &token).unwrap_err();
263        assert!(format!("{err}").contains("expired"));
264    }
265
266    #[tokio::test]
267    async fn tampered_signature_rejected() {
268        let (signer, _dir) = make_signer().await;
269        let token = issue_worker_bootstrap_token(&signer, "c", 3600, 1, vec![]).expect("issue");
270
271        let mut bad = token.clone();
272        bad.claims.cluster_id = "different-cluster".into();
273        // Don't re-sign — the original signature is over the original claims,
274        // so the modified token's payload won't verify.
275
276        let err = verify_worker_bootstrap_token(&signer, &bad).unwrap_err();
277        assert!(format!("{err}").contains("signature invalid"));
278    }
279
280    #[tokio::test]
281    async fn wrong_domain_tag_rejected() {
282        let (signer, _dir) = make_signer().await;
283        let mut token = issue_worker_bootstrap_token(&signer, "c", 3600, 1, vec![]).expect("issue");
284        token.claims.domain_tag = "other-domain".into();
285        let err = verify_worker_bootstrap_token(&signer, &token).unwrap_err();
286        assert!(format!("{err}").contains("domain"));
287    }
288}