Skip to main content

zlayer_secrets/
cluster_signer.rs

1//! Ed25519 cluster signer for signed join tokens.
2//!
3//! A `ClusterSigner` holds an Ed25519 keypair used by the cluster leader to
4//! sign join tokens (and verify them on the receiving end). The keypair is
5//! persisted to disk as a JSON keystore (Unix mode 0600) so the cluster
6//! identity survives daemon restarts and supports key rotation with grace
7//! periods.
8//!
9//! ## On-disk format (`version = 1`)
10//!
11//! The keystore is a JSON document like:
12//!
13//! ```json
14//! {
15//!   "version": 1,
16//!   "keys": [
17//!     {
18//!       "id": "abc12345",
19//!       "seed_b64": "...",
20//!       "created_at": "2026-05-14T17:55:00Z"
21//!     }
22//!   ],
23//!   "active": "abc12345",
24//!   "retired_grace_until": {}
25//! }
26//! ```
27//!
28//! - `version: u32` — file format version (1 for now). Future bumps can
29//!   migrate cleanly.
30//! - `keys` — every known signing keypair (active or in grace).
31//! - `active` — the kid of the currently-active key. Must match exactly one
32//!   entry in `keys`.
33//! - `retired_grace_until` — kids of retired keys mapped to the timestamp at
34//!   which their grace period expires. After that timestamp the entry is
35//!   eligible for pruning.
36//!
37//! ## Legacy migration
38//!
39//! Wave 1 persisted the seed as exactly 32 raw bytes. `load_or_generate`
40//! transparently detects that format (file exists, parses as 32 bytes, and
41//! is not valid keystore JSON), migrates it to the new JSON layout in place
42//! (atomic write via `{path}.tmp` then rename), and continues normally. The
43//! migration is idempotent: running `load_or_generate` twice on a freshly-
44//! migrated keystore performs no further writes.
45
46use std::collections::HashMap;
47use std::path::{Path, PathBuf};
48use std::sync::OnceLock;
49
50use base64::engine::general_purpose::URL_SAFE_NO_PAD;
51use base64::Engine as _;
52use chrono::{DateTime, Utc};
53use ed25519_dalek::{Signature, Signer, SigningKey, VerifyingKey};
54use rand::TryRngCore;
55use serde::{Deserialize, Serialize};
56use sha2::{Digest, Sha256};
57use tokio::fs;
58use tokio::sync::Mutex;
59
60use crate::SecretsError;
61
62/// Length of the on-disk raw seed in bytes (Ed25519 `SigningKey` seed size).
63const SIGNING_KEY_SEED_LEN: usize = 32;
64
65/// Current on-disk keystore format version.
66pub(crate) const KEYSTORE_VERSION: u32 = 1;
67
68/// One entry in the keystore — a single Ed25519 signing key with metadata.
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub(crate) struct KeyEntry {
71    /// Short greppable identifier (first 8 hex chars of SHA-256(public key)).
72    pub(crate) id: String,
73    /// URL-safe no-pad base64 encoding of the 32-byte signing seed.
74    pub(crate) seed_b64: String,
75    /// When this key was generated.
76    pub(crate) created_at: DateTime<Utc>,
77}
78
79/// The on-disk JSON keystore.
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub(crate) struct KeyStoreFile {
82    /// Format version (currently `1`).
83    pub(crate) version: u32,
84    /// Every known signing keypair, active or in grace.
85    pub(crate) keys: Vec<KeyEntry>,
86    /// Kid of the currently-active signing key. Must match exactly one
87    /// `keys[].id`.
88    pub(crate) active: String,
89    /// Kids of retired keys mapped to the timestamp at which their grace
90    /// period expires.
91    #[serde(default)]
92    pub(crate) retired_grace_until: HashMap<String, DateTime<Utc>>,
93}
94
95/// Ed25519 keypair used to sign cluster join tokens.
96///
97/// Cloning is intentionally not implemented: each `ClusterSigner` owns its
98/// signing material and should be passed by reference through the daemon
99/// (typically `Arc<ClusterSigner>`).
100pub struct ClusterSigner {
101    signing: SigningKey,
102    public: VerifyingKey,
103}
104
105impl std::fmt::Debug for ClusterSigner {
106    /// Redacts the private key material. Only the short, public `key_id` is
107    /// printed so signing keys never leak via accidental `{:?}` formatting.
108    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
109        f.debug_struct("ClusterSigner")
110            .field("key_id", &self.key_id())
111            .field("signing", &"<redacted>")
112            .finish()
113    }
114}
115
116impl ClusterSigner {
117    /// Generate a fresh keypair from the OS CSPRNG.
118    ///
119    /// The seed is drawn directly from the OS via `rand::rngs::OsRng`
120    /// (workspace `rand` 0.9) and fed to `SigningKey::from_bytes`. We avoid
121    /// `SigningKey::generate` here because `ed25519-dalek 2.x` requires
122    /// `rand_core 0.6`'s `CryptoRngCore` trait, while the workspace pins
123    /// `rand 0.9` (whose `OsRng` implements `rand_core 0.9`'s `TryRngCore`).
124    /// Filling 32 bytes via the workspace `rand` is equivalent: an Ed25519
125    /// signing key is just 32 random bytes.
126    ///
127    /// # Panics
128    /// Panics if the OS CSPRNG fails. This matches the behavior of
129    /// `SigningKey::generate(&mut OsRng)` and is appropriate because key
130    /// generation cannot proceed without entropy.
131    #[must_use]
132    pub fn generate() -> Self {
133        let mut seed = [0u8; SIGNING_KEY_SEED_LEN];
134        rand::rngs::OsRng
135            .try_fill_bytes(&mut seed)
136            .expect("OS CSPRNG must be available to generate a cluster signer key");
137        let signing = SigningKey::from_bytes(&seed);
138        let public = signing.verifying_key();
139        Self { signing, public }
140    }
141
142    /// Load the keypair from the on-disk keystore at `path` if present;
143    /// otherwise generate a fresh one, persist it as a JSON keystore with
144    /// mode 0600, and return it. Returns the `ClusterSigner` for the
145    /// currently-active key.
146    ///
147    /// If the file exists in the legacy raw-32-byte format (Wave 1), it is
148    /// transparently migrated to the JSON keystore layout in place. The
149    /// migration is idempotent.
150    ///
151    /// The parent directory of `path` is created if it does not exist
152    /// (`mkdir -p` semantics).
153    ///
154    /// # Errors
155    /// - [`SecretsError::Storage`] if the parent directory cannot be created,
156    ///   if the file cannot be read or written, or if the file exists but is
157    ///   neither valid keystore JSON nor a 32-byte legacy seed.
158    /// - [`SecretsError::Storage`] if Unix file permissions cannot be set.
159    pub async fn load_or_generate(path: &Path) -> Result<Self, SecretsError> {
160        // 1. Ensure the parent directory exists.
161        if let Some(parent) = path.parent() {
162            fs::create_dir_all(parent).await.map_err(|e| {
163                SecretsError::Storage(format!(
164                    "Failed to create cluster signer directory {}: {e}",
165                    parent.display()
166                ))
167            })?;
168        }
169
170        // 2. If the file does not exist, generate fresh and persist.
171        if !fs::try_exists(path).await.map_err(|e| {
172            SecretsError::Storage(format!(
173                "Failed to stat cluster signer key file {}: {e}",
174                path.display()
175            ))
176        })? {
177            let signer = Self::generate();
178            let entry = KeyEntry {
179                id: signer.key_id(),
180                seed_b64: URL_SAFE_NO_PAD.encode(signer.signing.to_bytes()),
181                created_at: Utc::now(),
182            };
183            let store = KeyStoreFile {
184                version: KEYSTORE_VERSION,
185                active: entry.id.clone(),
186                keys: vec![entry],
187                retired_grace_until: HashMap::new(),
188            };
189            Self::write_keystore(path, &store).await?;
190            return Ok(signer);
191        }
192
193        // 3. File exists — read it. `read_keystore` handles both the JSON
194        //    format and the legacy 32-byte format, migrating the latter in
195        //    place.
196        let store = Self::read_keystore(path).await?;
197        Self::from_keystore(&store, path)
198    }
199
200    /// Construct a `ClusterSigner` for the active key in `store`.
201    fn from_keystore(store: &KeyStoreFile, path: &Path) -> Result<Self, SecretsError> {
202        let active = store
203            .keys
204            .iter()
205            .find(|k| k.id == store.active)
206            .ok_or_else(|| {
207                SecretsError::Storage(format!(
208                    "cluster signer keystore {} declares active kid {:?} but no matching key entry exists",
209                    path.display(),
210                    store.active
211                ))
212            })?;
213
214        let seed_bytes = URL_SAFE_NO_PAD.decode(&active.seed_b64).map_err(|e| {
215            SecretsError::Storage(format!(
216                "cluster signer keystore {} has invalid base64 seed for kid {:?}: {e}",
217                path.display(),
218                active.id
219            ))
220        })?;
221
222        if seed_bytes.len() != SIGNING_KEY_SEED_LEN {
223            return Err(SecretsError::Storage(format!(
224                "cluster signer keystore {} has wrong seed length for kid {:?}: expected {}, got {}",
225                path.display(),
226                active.id,
227                SIGNING_KEY_SEED_LEN,
228                seed_bytes.len()
229            )));
230        }
231
232        let mut seed = [0u8; SIGNING_KEY_SEED_LEN];
233        seed.copy_from_slice(&seed_bytes);
234        let signing = SigningKey::from_bytes(&seed);
235        let public = signing.verifying_key();
236        Ok(Self { signing, public })
237    }
238
239    /// Read the on-disk keystore at `path`. If the file is in the legacy
240    /// raw-32-byte format from Wave 1, migrate it to the new JSON layout in
241    /// place and return the migrated structure.
242    ///
243    /// This helper is `pub(crate)` so subsequent Wave 5A agents (rotate,
244    /// multi-key validate, background prune) can reuse it without re-doing
245    /// the migration logic.
246    ///
247    /// # Errors
248    /// - [`SecretsError::Storage`] if the file cannot be read.
249    /// - [`SecretsError::Storage`] if the file is neither valid keystore JSON
250    ///   nor exactly 32 bytes (legacy format).
251    /// - [`SecretsError::Storage`] if the legacy 32-byte file fails to
252    ///   migrate (e.g., the rewrite step errors).
253    pub(crate) async fn read_keystore(path: &Path) -> Result<KeyStoreFile, SecretsError> {
254        let buf = fs::read(path).await.map_err(|e| {
255            SecretsError::Storage(format!(
256                "Failed to read cluster signer key file {}: {e}",
257                path.display()
258            ))
259        })?;
260
261        // Try JSON keystore first.
262        match serde_json::from_slice::<KeyStoreFile>(&buf) {
263            Ok(store) => Ok(store),
264            Err(json_err) => {
265                // Maybe legacy raw-seed format.
266                if buf.len() == SIGNING_KEY_SEED_LEN {
267                    let mut seed = [0u8; SIGNING_KEY_SEED_LEN];
268                    seed.copy_from_slice(&buf);
269                    let signing = SigningKey::from_bytes(&seed);
270                    let public = signing.verifying_key();
271                    let digest = Sha256::digest(public.as_bytes());
272                    let kid = hex_short(&digest);
273                    let entry = KeyEntry {
274                        id: kid.clone(),
275                        seed_b64: URL_SAFE_NO_PAD.encode(seed),
276                        created_at: Utc::now(),
277                    };
278                    let store = KeyStoreFile {
279                        version: KEYSTORE_VERSION,
280                        active: kid,
281                        keys: vec![entry],
282                        retired_grace_until: HashMap::new(),
283                    };
284                    Self::write_keystore(path, &store).await?;
285                    Ok(store)
286                } else {
287                    Err(SecretsError::Storage(format!(
288                        "cluster signer key file {} has unexpected format: not valid keystore JSON ({json_err}) and not a {SIGNING_KEY_SEED_LEN}-byte legacy seed (got {} bytes)",
289                        path.display(),
290                        buf.len()
291                    )))
292                }
293            }
294        }
295    }
296
297    /// Persist `store` to `path` atomically with Unix mode 0600.
298    ///
299    /// Writes to `{path}.tmp` first (with mode 0600 from the start on Unix,
300    /// so the seed bytes are never on disk under a more permissive mode),
301    /// then renames over `path`. On Unix the rename is atomic within the
302    /// same filesystem, so a crash mid-write cannot leave a half-written
303    /// keystore at `path`.
304    ///
305    /// This helper is `pub(crate)` so subsequent Wave 5A agents can reuse
306    /// it without re-implementing the atomic-write + 0600 dance.
307    ///
308    /// # Errors
309    /// - [`SecretsError::Storage`] if serialization fails (should be
310    ///   impossible for our types but propagated defensively).
311    /// - [`SecretsError::Storage`] if the temp file cannot be created,
312    ///   written, flushed, permissioned, or renamed.
313    pub(crate) async fn write_keystore(
314        path: &Path,
315        store: &KeyStoreFile,
316    ) -> Result<(), SecretsError> {
317        let json = serde_json::to_vec_pretty(store).map_err(|e| {
318            SecretsError::Storage(format!(
319                "Failed to serialize cluster signer keystore for {}: {e}",
320                path.display()
321            ))
322        })?;
323
324        let tmp = tmp_path_for(path);
325
326        #[cfg(unix)]
327        {
328            use std::os::unix::fs::PermissionsExt;
329            use tokio::fs::OpenOptions;
330            use tokio::io::AsyncWriteExt as _;
331
332            let mut file = OpenOptions::new()
333                .write(true)
334                .create(true)
335                .truncate(true)
336                .mode(0o600)
337                .open(&tmp)
338                .await
339                .map_err(|e| {
340                    SecretsError::Storage(format!(
341                        "Failed to create cluster signer keystore temp file {}: {e}",
342                        tmp.display()
343                    ))
344                })?;
345
346            file.write_all(&json).await.map_err(|e| {
347                SecretsError::Storage(format!(
348                    "Failed to write cluster signer keystore temp file {}: {e}",
349                    tmp.display()
350                ))
351            })?;
352            file.flush().await.map_err(|e| {
353                SecretsError::Storage(format!(
354                    "Failed to flush cluster signer keystore temp file {}: {e}",
355                    tmp.display()
356                ))
357            })?;
358
359            // Belt-and-suspenders: re-set 0600 on the temp file in case the
360            // FS/umask gave us anything broader.
361            let permissions = std::fs::Permissions::from_mode(0o600);
362            fs::set_permissions(&tmp, permissions).await.map_err(|e| {
363                SecretsError::Storage(format!(
364                    "Failed to set permissions on cluster signer keystore temp file {}: {e}",
365                    tmp.display()
366                ))
367            })?;
368        }
369
370        #[cfg(not(unix))]
371        {
372            fs::write(&tmp, &json).await.map_err(|e| {
373                SecretsError::Storage(format!(
374                    "Failed to write cluster signer keystore temp file {}: {e}",
375                    tmp.display()
376                ))
377            })?;
378        }
379
380        // Atomic rename over the destination.
381        fs::rename(&tmp, path).await.map_err(|e| {
382            SecretsError::Storage(format!(
383                "Failed to rename cluster signer keystore {} -> {}: {e}",
384                tmp.display(),
385                path.display()
386            ))
387        })?;
388
389        Ok(())
390    }
391
392    /// The public verifying key.
393    #[must_use]
394    pub fn verifying_key(&self) -> VerifyingKey {
395        self.public
396    }
397
398    /// URL-safe no-pad base64 of the 32-byte verifying key.
399    #[must_use]
400    pub fn public_key_b64(&self) -> String {
401        URL_SAFE_NO_PAD.encode(self.public.as_bytes())
402    }
403
404    /// First 8 hex chars of SHA-256(verifying_key bytes). Short, greppable
405    /// identifier for log lines and token headers.
406    #[must_use]
407    pub fn key_id(&self) -> String {
408        let digest = Sha256::digest(self.public.as_bytes());
409        hex_short(&digest)
410    }
411
412    /// Sign a message. Returns the raw 64-byte Ed25519 signature.
413    #[must_use]
414    pub fn sign(&self, msg: &[u8]) -> [u8; 64] {
415        let sig: Signature = self.signing.sign(msg);
416        sig.to_bytes()
417    }
418}
419
420/// Lowercase hex of the first 4 bytes of `digest`. The result is 8 hex
421/// chars — the short, greppable `kid` form used in log lines, token
422/// headers, and `CaCert::active_kid`.
423fn hex_short(digest: &[u8]) -> String {
424    hex::encode(&digest[..4])
425}
426
427/// Build the temp-file path used during atomic keystore writes.
428///
429/// We append a `.tmp` suffix to the filename rather than a sibling random
430/// name so the temp lives in the same directory as the destination — that
431/// directory is already guaranteed to be on the same filesystem, making the
432/// subsequent `rename` atomic.
433fn tmp_path_for(path: &Path) -> PathBuf {
434    let mut os = path.as_os_str().to_owned();
435    os.push(".tmp");
436    PathBuf::from(os)
437}
438
439/// Process-local mutex serializing read-modify-write operations on the
440/// keystore.
441///
442/// Rotations and grace-pruning are operator-driven (CLI / API) and rare,
443/// so a single global mutex is sufficient. The on-disk file write itself
444/// is already atomic via `rename`, but two concurrent rotations on the
445/// same process could still race when computing the new state (one reads
446/// the keystore before the other has written its update, then overwrites
447/// the other's work). The mutex prevents that lost-update window.
448///
449/// We do not key the mutex by path because (a) only one keystore is ever
450/// used per process in practice, (b) the operations are sub-millisecond,
451/// and (c) keying introduces a per-path registry that complicates
452/// lifetime management for no real benefit.
453fn keystore_lock() -> &'static Mutex<()> {
454    static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
455    LOCK.get_or_init(|| Mutex::new(()))
456}
457
458/// Status of a key returned by [`list_valid_pubkeys`].
459#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
460#[serde(rename_all = "lowercase")]
461pub enum PubkeyStatus {
462    /// The currently-active signing key (used for newly-issued tokens).
463    Active,
464    /// A retired key still within its grace window, accepted for
465    /// verification only.
466    Grace,
467}
468
469/// One entry in the result of [`list_valid_pubkeys`].
470///
471/// `valid_until` is `None` for [`PubkeyStatus::Active`] (the active key has
472/// no scheduled expiration — it remains valid until the next rotation),
473/// and `Some(_)` for [`PubkeyStatus::Grace`] indicating when the key will
474/// be pruned and stop being accepted.
475#[derive(Debug, Clone, Serialize, Deserialize)]
476pub struct PubkeyInfo {
477    /// The key's short identifier (first 8 hex chars of SHA-256(pubkey)).
478    pub kid: String,
479    /// URL-safe no-pad base64 of the 32-byte verifying key.
480    pub public_key_b64: String,
481    /// Whether this key is the active signer or in grace.
482    pub status: PubkeyStatus,
483    /// For [`PubkeyStatus::Grace`], the timestamp this key stops being
484    /// accepted. `None` for [`PubkeyStatus::Active`].
485    pub valid_until: Option<DateTime<Utc>>,
486    /// When this key was generated.
487    pub created_at: DateTime<Utc>,
488}
489
490/// The outcome of a [`rotate_keystore`] call.
491///
492/// Named `KeystoreRotationResult` to disambiguate from the existing
493/// `crate::RotationResult` which describes generic secret rotations.
494#[derive(Debug, Clone, Serialize, Deserialize)]
495pub struct KeystoreRotationResult {
496    /// The kid of the new active key.
497    pub new_active_kid: String,
498    /// URL-safe no-pad base64 of the new active key.
499    pub new_active_public_key_b64: String,
500    /// The kid that was previously active and is now in grace.
501    pub previous_kid: String,
502    /// When the previous-active key's grace period expires (RFC3339).
503    /// After this, the key is purged on the next [`prune_expired_grace`].
504    pub previous_grace_until: DateTime<Utc>,
505}
506
507/// Rotate the cluster signing keystore at `path`:
508/// generate a fresh keypair, set it as active, and move the previous
509/// active key into the grace map with expiration `now + grace`.
510///
511/// The new key is the only signer used for future tokens; the previous
512/// key remains valid for token verification until `now + grace`, after
513/// which [`prune_expired_grace`] removes it.
514///
515/// # Concurrency
516/// This function takes a process-local mutex around the read-modify-write
517/// cycle so two concurrent rotations in the same process cannot lose each
518/// other's update. The mutex does NOT protect against cross-process races
519/// — but `ZLayer` runs a single daemon per host, and rotations are
520/// operator-driven (one CLI/API call at a time), so that scenario is
521/// out of scope.
522///
523/// # Errors
524/// - [`SecretsError::Storage`] if the keystore cannot be read or written.
525/// - [`SecretsError::Storage`] (`"kid collision; retry rotation"`) on the
526///   astronomically-unlikely event that the new kid collides with an
527///   existing entry. Callers should retry on this error.
528pub async fn rotate_keystore(
529    path: &Path,
530    grace: std::time::Duration,
531) -> Result<KeystoreRotationResult, SecretsError> {
532    let _guard = keystore_lock().lock().await;
533
534    // 1. Read existing keystore.
535    let mut store = ClusterSigner::read_keystore(path).await?;
536    let old_active = store.active.clone();
537
538    // 2. Generate fresh seed via OS CSPRNG.
539    let mut seed = [0u8; SIGNING_KEY_SEED_LEN];
540    rand::rngs::OsRng
541        .try_fill_bytes(&mut seed)
542        .map_err(|e| SecretsError::Storage(format!("OS CSPRNG failed during rotation: {e}")))?;
543    let signing = SigningKey::from_bytes(&seed);
544    let public = signing.verifying_key();
545    let new_kid = {
546        let digest = Sha256::digest(public.as_bytes());
547        hex_short(&digest)
548    };
549    let new_pub_b64 = URL_SAFE_NO_PAD.encode(public.as_bytes());
550
551    // 3. Refuse to overwrite an existing kid — even though the chance is
552    //    ~1 in 2^32, a real collision would silently invalidate the
553    //    previous key. Bail and let the caller retry.
554    if store.keys.iter().any(|k| k.id == new_kid) {
555        return Err(SecretsError::Storage(format!(
556            "kid collision; retry rotation (new_kid={new_kid} already in keystore)"
557        )));
558    }
559
560    // 4. Compute grace expiry. `chrono::Duration::from_std` only fails on
561    //    durations exceeding i64::MAX milliseconds — clamp defensively.
562    let now = Utc::now();
563    let grace_chrono = chrono::Duration::from_std(grace).unwrap_or(chrono::Duration::MAX);
564    let previous_grace_until = now
565        .checked_add_signed(grace_chrono)
566        .unwrap_or(DateTime::<Utc>::MAX_UTC);
567
568    // 5. Append new entry, retire old active, swap.
569    store.keys.push(KeyEntry {
570        id: new_kid.clone(),
571        seed_b64: URL_SAFE_NO_PAD.encode(seed),
572        created_at: now,
573    });
574    store
575        .retired_grace_until
576        .insert(old_active.clone(), previous_grace_until);
577    store.active.clone_from(&new_kid);
578
579    // 6. Persist atomically.
580    ClusterSigner::write_keystore(path, &store).await?;
581
582    Ok(KeystoreRotationResult {
583        new_active_kid: new_kid,
584        new_active_public_key_b64: new_pub_b64,
585        previous_kid: old_active,
586        previous_grace_until,
587    })
588}
589
590/// Load a [`ClusterSigner`] for a specific `kid` from the keystore at
591/// `path` if and only if that kid is currently trusted.
592///
593/// A kid is currently trusted when:
594/// - it is the active signing key (`store.active == kid`), OR
595/// - it is in `store.retired_grace_until` AND the recorded expiration is
596///   still in the future.
597///
598/// Returns `Ok(None)` if the kid is not in the keystore at all, or if it
599/// is in the keystore but its grace window has already elapsed (which
600/// shouldn't normally happen because [`prune_expired_grace`] removes such
601/// entries on the daemon's hourly tick).
602///
603/// This is the verify-side counterpart to [`rotate_keystore`]: token
604/// validation looks up the signer matching the token's `kid` header and
605/// returns `Some(_)` only when the key is currently valid.
606///
607/// # Errors
608/// - [`SecretsError::Storage`] if the keystore cannot be read or contains
609///   a malformed seed.
610pub async fn load_signer_for_kid(
611    path: &Path,
612    kid: &str,
613) -> Result<Option<ClusterSigner>, SecretsError> {
614    let store = ClusterSigner::read_keystore(path).await?;
615
616    let Some(entry) = store.keys.iter().find(|k| k.id == kid) else {
617        return Ok(None);
618    };
619
620    // Decide whether this kid is currently valid.
621    let valid = if store.active == kid {
622        true
623    } else if let Some(expires_at) = store.retired_grace_until.get(kid) {
624        Utc::now() < *expires_at
625    } else {
626        // Present in `keys` but neither active nor in grace — leftover
627        // entry that should have been pruned. Treat as invalid.
628        false
629    };
630
631    if !valid {
632        return Ok(None);
633    }
634
635    let seed_bytes = URL_SAFE_NO_PAD.decode(&entry.seed_b64).map_err(|e| {
636        SecretsError::Storage(format!(
637            "cluster signer keystore {} has invalid base64 seed for kid {:?}: {e}",
638            path.display(),
639            entry.id
640        ))
641    })?;
642    if seed_bytes.len() != SIGNING_KEY_SEED_LEN {
643        return Err(SecretsError::Storage(format!(
644            "cluster signer keystore {} has wrong seed length for kid {:?}: expected {}, got {}",
645            path.display(),
646            entry.id,
647            SIGNING_KEY_SEED_LEN,
648            seed_bytes.len()
649        )));
650    }
651    let mut seed = [0u8; SIGNING_KEY_SEED_LEN];
652    seed.copy_from_slice(&seed_bytes);
653    let signing = SigningKey::from_bytes(&seed);
654    let public = signing.verifying_key();
655    Ok(Some(ClusterSigner { signing, public }))
656}
657
658/// List every key in the keystore that is currently valid (active or
659/// in-grace and not yet expired).
660///
661/// Order: the active key first, then grace entries sorted by descending
662/// `valid_until` (closest-to-expiry-last, so newer grace entries come
663/// before older ones). Stale entries whose `retired_grace_until` is
664/// already in the past are omitted (they're pending prune).
665///
666/// # Errors
667/// - [`SecretsError::Storage`] if the keystore cannot be read.
668pub async fn list_valid_pubkeys(path: &Path) -> Result<Vec<PubkeyInfo>, SecretsError> {
669    let store = ClusterSigner::read_keystore(path).await?;
670    let now = Utc::now();
671
672    let mut active_info: Option<PubkeyInfo> = None;
673    let mut grace_infos: Vec<PubkeyInfo> = Vec::new();
674
675    for entry in &store.keys {
676        let seed_bytes = URL_SAFE_NO_PAD.decode(&entry.seed_b64).map_err(|e| {
677            SecretsError::Storage(format!(
678                "cluster signer keystore {} has invalid base64 seed for kid {:?}: {e}",
679                path.display(),
680                entry.id
681            ))
682        })?;
683        if seed_bytes.len() != SIGNING_KEY_SEED_LEN {
684            return Err(SecretsError::Storage(format!(
685                "cluster signer keystore {} has wrong seed length for kid {:?}: expected {}, got {}",
686                path.display(),
687                entry.id,
688                SIGNING_KEY_SEED_LEN,
689                seed_bytes.len()
690            )));
691        }
692        let mut seed = [0u8; SIGNING_KEY_SEED_LEN];
693        seed.copy_from_slice(&seed_bytes);
694        let public = SigningKey::from_bytes(&seed).verifying_key();
695        let public_key_b64 = URL_SAFE_NO_PAD.encode(public.as_bytes());
696
697        if entry.id == store.active {
698            active_info = Some(PubkeyInfo {
699                kid: entry.id.clone(),
700                public_key_b64,
701                status: PubkeyStatus::Active,
702                valid_until: None,
703                created_at: entry.created_at,
704            });
705        } else if let Some(expires_at) = store.retired_grace_until.get(&entry.id) {
706            if now < *expires_at {
707                grace_infos.push(PubkeyInfo {
708                    kid: entry.id.clone(),
709                    public_key_b64,
710                    status: PubkeyStatus::Grace,
711                    valid_until: Some(*expires_at),
712                    created_at: entry.created_at,
713                });
714            }
715            // else: expired, pending prune — skip.
716        }
717        // else: leftover entry with no grace record; skip.
718    }
719
720    // Sort grace by descending `valid_until` (latest expiry first).
721    grace_infos.sort_by(|a, b| b.valid_until.cmp(&a.valid_until));
722
723    let mut out = Vec::with_capacity(1 + grace_infos.len());
724    if let Some(active) = active_info {
725        out.push(active);
726    }
727    out.extend(grace_infos);
728    Ok(out)
729}
730
731/// Remove every keystore entry whose grace window has expired.
732///
733/// Iterates `retired_grace_until`, drops every kid whose expiration is
734/// `<= now`, and also removes the matching entries from `store.keys`.
735/// Persists the keystore only if at least one key was pruned (so the
736/// hot-loop "nothing to do" path performs zero I/O writes).
737///
738/// Wave 5A.5 wires a background daemon task to call this hourly.
739///
740/// # Errors
741/// - [`SecretsError::Storage`] if the keystore cannot be read or written.
742///
743/// # Returns
744/// The number of keys pruned.
745pub async fn prune_expired_grace(path: &Path) -> Result<usize, SecretsError> {
746    let _guard = keystore_lock().lock().await;
747
748    let mut store = ClusterSigner::read_keystore(path).await?;
749    let now = Utc::now();
750
751    // Collect kids whose grace has expired.
752    let expired: Vec<String> = store
753        .retired_grace_until
754        .iter()
755        .filter_map(|(kid, expires)| {
756            if *expires <= now {
757                Some(kid.clone())
758            } else {
759                None
760            }
761        })
762        .collect();
763
764    if expired.is_empty() {
765        return Ok(0);
766    }
767
768    // Drop them from both maps. We never prune the active key even if
769    // (somehow) it ended up in `retired_grace_until`.
770    for kid in &expired {
771        if *kid == store.active {
772            continue;
773        }
774        store.retired_grace_until.remove(kid);
775        store.keys.retain(|k| &k.id != kid);
776    }
777
778    let pruned = expired.iter().filter(|k| **k != store.active).count();
779    if pruned == 0 {
780        // We only had the active-key edge case above.
781        return Ok(0);
782    }
783
784    ClusterSigner::write_keystore(path, &store).await?;
785    Ok(pruned)
786}
787
788/// Long-lived cluster CA keypair.
789///
790/// Identifies this cluster across the entire federation. NEVER
791/// rotated — rotation would invalidate every `CaCert` this cluster has
792/// ever issued and break federation trust. Stored as a raw 32-byte
793/// Ed25519 seed at `{data_dir}/cluster_ca.key` (0600).
794///
795/// The per-rotation signing keys (see [`ClusterSigner`]) live in a
796/// separate JSON keystore at `cluster_signing.key`. The CA key is
797/// deliberately not part of that keystore so it survives the
798/// rotation-and-prune machinery.
799pub struct ClusterCa {
800    signing: ed25519_dalek::SigningKey,
801    public: ed25519_dalek::VerifyingKey,
802}
803
804impl std::fmt::Debug for ClusterCa {
805    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
806        f.debug_struct("ClusterCa")
807            .field("ca_kid", &self.ca_kid())
808            .finish_non_exhaustive()
809    }
810}
811
812impl ClusterCa {
813    /// Generate a fresh CA keypair using the OS CSPRNG.
814    ///
815    /// # Panics
816    ///
817    /// Panics if the OS CSPRNG fails — same fail-loud behavior as
818    /// [`ClusterSigner::generate`].
819    #[must_use]
820    pub fn generate() -> Self {
821        use rand::TryRngCore;
822        let mut seed = [0u8; 32];
823        rand::rngs::OsRng
824            .try_fill_bytes(&mut seed)
825            .expect("OS CSPRNG must be available to generate a cluster CA key");
826        let signing = ed25519_dalek::SigningKey::from_bytes(&seed);
827        let public = signing.verifying_key();
828        Self { signing, public }
829    }
830
831    /// Load the CA seed from `path`, or generate + persist a fresh one
832    /// if the file does not exist.
833    ///
834    /// On creation the file is written atomically (tmp + rename) with
835    /// mode 0600 on Unix. The 32-byte seed is the entirety of the
836    /// file — no JSON, no headers; this is intentionally a simpler
837    /// format than the signing keystore because the CA key never
838    /// rotates.
839    ///
840    /// # Errors
841    ///
842    /// - [`SecretsError::Storage`] for any IO error reading or writing
843    ///   the file, or if an existing file has the wrong length.
844    pub async fn load_or_generate(path: &std::path::Path) -> Result<Self, SecretsError> {
845        if tokio::fs::try_exists(path)
846            .await
847            .map_err(|e| SecretsError::Storage(format!("checking {}: {e}", path.display())))?
848        {
849            let bytes = tokio::fs::read(path)
850                .await
851                .map_err(|e| SecretsError::Storage(format!("reading {}: {e}", path.display())))?;
852            if bytes.len() != 32 {
853                return Err(SecretsError::Storage(format!(
854                    "cluster_ca.key at {} has wrong length: expected 32 bytes, got {}",
855                    path.display(),
856                    bytes.len()
857                )));
858            }
859            let mut seed = [0u8; 32];
860            seed.copy_from_slice(&bytes);
861            let signing = ed25519_dalek::SigningKey::from_bytes(&seed);
862            let public = signing.verifying_key();
863            return Ok(Self { signing, public });
864        }
865
866        let ca = Self::generate();
867        let seed = ca.signing.to_bytes();
868        let tmp_path = path.with_extension("ca.tmp");
869        tokio::fs::write(&tmp_path, &seed[..])
870            .await
871            .map_err(|e| SecretsError::Storage(format!("writing tmp ca file: {e}")))?;
872        #[cfg(unix)]
873        {
874            use std::os::unix::fs::PermissionsExt;
875            let perms = std::fs::Permissions::from_mode(0o600);
876            tokio::fs::set_permissions(&tmp_path, perms)
877                .await
878                .map_err(|e| SecretsError::Storage(format!("chmod 0600 ca tmp: {e}")))?;
879        }
880        tokio::fs::rename(&tmp_path, path)
881            .await
882            .map_err(|e| SecretsError::Storage(format!("rename ca tmp to final: {e}")))?;
883        Ok(ca)
884    }
885
886    /// CA verifying key as URL-safe no-pad base64.
887    #[must_use]
888    pub fn ca_public_key_b64(&self) -> String {
889        URL_SAFE_NO_PAD.encode(self.public.as_bytes())
890    }
891
892    /// Short CA key id: first 8 hex chars of SHA-256(CA verifying key bytes).
893    #[must_use]
894    pub fn ca_kid(&self) -> String {
895        let mut hasher = Sha256::new();
896        hasher.update(self.public.as_bytes());
897        let digest = hasher.finalize();
898        hex_short(&digest)
899    }
900
901    /// CA verifying key for verification (not exported through trait).
902    #[must_use]
903    pub fn verifying_key(&self) -> ed25519_dalek::VerifyingKey {
904        self.public
905    }
906
907    /// Build a [`CaCert`] binding `active_kid` (whose verifying-key
908    /// base64 is `active_pubkey_b64`) to `cluster_domain` for the
909    /// window `[now, now + grace]`. The result's `sig_by_ca` is the
910    /// CA's Ed25519 signature over the canonical bytes of the body.
911    ///
912    /// "Canonical bytes" = `serde_json::to_vec` of a `CaCert` with the
913    /// signature field cleared (`""`). Verifiers must do the same
914    /// transformation before calling `verify_strict`.
915    ///
916    /// # Errors
917    ///
918    /// - [`SecretsError::Provider`] if serializing the canonical body
919    ///   fails (should not happen with the well-formed `CaCert` struct).
920    pub fn issue_ca_cert(
921        &self,
922        active_kid: String,
923        active_pubkey_b64: String,
924        cluster_domain: String,
925        grace: std::time::Duration,
926    ) -> Result<zlayer_types::api::cluster::CaCert, SecretsError> {
927        use ed25519_dalek::Signer;
928
929        let now = chrono::Utc::now();
930        let issued_at = now.to_rfc3339();
931        let expires_at = (now
932            + chrono::Duration::from_std(grace)
933                .map_err(|e| SecretsError::Provider(format!("grace out of range: {e}")))?)
934        .to_rfc3339();
935
936        let mut cert = zlayer_types::api::cluster::CaCert {
937            v: zlayer_types::api::cluster::CA_CERT_FORMAT_VERSION,
938            active_kid,
939            active_pubkey_b64,
940            issued_at,
941            expires_at,
942            cluster_domain,
943            sig_by_ca: String::new(),
944        };
945        let body_bytes = serde_json::to_vec(&cert).map_err(|e| {
946            SecretsError::Provider(format!("serializing CaCert body for signing: {e}"))
947        })?;
948        let sig = self.signing.sign(&body_bytes);
949        cert.sig_by_ca = URL_SAFE_NO_PAD.encode(sig.to_bytes());
950        Ok(cert)
951    }
952
953    /// Verify a `CaCert` against a CA public key.
954    ///
955    /// The CA public key is the bytes that an importer obtained
956    /// out-of-band from a [`crate::TrustBundle`]. Verification:
957    /// 1. Decodes the `sig_by_ca` base64.
958    /// 2. Recomputes the canonical body bytes (`sig_by_ca` cleared).
959    /// 3. Calls `verify_strict` on the CA pubkey.
960    /// 4. Checks `expires_at > now` so an expired cert is rejected.
961    ///
962    /// Does NOT check `cluster_domain` — that's the caller's job
963    /// (typically: "does the cert's `cluster_domain` match the
964    /// `TrustBundle` we looked up by domain?").
965    ///
966    /// # Errors
967    ///
968    /// - [`SecretsError::Provider`] for any decode/verification/expiry
969    ///   failure with an actionable message.
970    pub fn verify_ca_cert(
971        ca_pubkey_b64: &str,
972        cert: &zlayer_types::api::cluster::CaCert,
973    ) -> Result<(), SecretsError> {
974        let ca_pubkey_bytes = URL_SAFE_NO_PAD
975            .decode(ca_pubkey_b64.as_bytes())
976            .map_err(|e| SecretsError::Provider(format!("CA pubkey base64 decode: {e}")))?;
977        let ca_pubkey_arr: [u8; 32] = ca_pubkey_bytes.as_slice().try_into().map_err(|_| {
978            SecretsError::Provider(format!("CA pubkey wrong length: {}", ca_pubkey_bytes.len()))
979        })?;
980        let ca_pubkey = ed25519_dalek::VerifyingKey::from_bytes(&ca_pubkey_arr)
981            .map_err(|e| SecretsError::Provider(format!("invalid CA pubkey: {e}")))?;
982
983        let sig_bytes = URL_SAFE_NO_PAD
984            .decode(cert.sig_by_ca.as_bytes())
985            .map_err(|e| SecretsError::Provider(format!("sig_by_ca base64 decode: {e}")))?;
986        let sig_arr: [u8; 64] = sig_bytes.as_slice().try_into().map_err(|_| {
987            SecretsError::Provider(format!("sig_by_ca wrong length: {}", sig_bytes.len()))
988        })?;
989        let sig = ed25519_dalek::Signature::from_bytes(&sig_arr);
990
991        let mut body = cert.clone();
992        body.sig_by_ca = String::new();
993        let body_bytes = serde_json::to_vec(&body).map_err(|e| {
994            SecretsError::Provider(format!("recomputing CaCert canonical body: {e}"))
995        })?;
996        ca_pubkey.verify_strict(&body_bytes, &sig).map_err(|e| {
997            SecretsError::Provider(format!("CA signature verification failed: {e}"))
998        })?;
999
1000        let exp = chrono::DateTime::parse_from_rfc3339(&cert.expires_at)
1001            .map_err(|e| SecretsError::Provider(format!("CaCert expires_at parse: {e}")))?
1002            .with_timezone(&chrono::Utc);
1003        if chrono::Utc::now() >= exp {
1004            return Err(SecretsError::Provider(format!(
1005                "CaCert expired at {}",
1006                cert.expires_at
1007            )));
1008        }
1009        Ok(())
1010    }
1011}
1012
1013// ---------------------------------------------------------------------------
1014// SigningBackend trait (Wave 8 — minimal scope)
1015//
1016// The trait abstracts where the keystore lives so future hardware-backed
1017// implementations (TPM 2.0, YubiHSM, cloud KMS) can swap in without
1018// touching call sites. The current shipping implementation is `FileBackend`,
1019// which delegates to the JSON keystore on disk via the existing free
1020// functions. TPM/YubiHSM impls are deliberately out of scope for this
1021// wave — the trait is the extension point only.
1022// ---------------------------------------------------------------------------
1023
1024/// Abstract interface over a cluster-signing-key store.
1025///
1026/// Implementations may live on local disk (`FileBackend`), in a TPM,
1027/// in an HSM, or in a cloud KMS — the trait keeps the call sites
1028/// agnostic. The default and only shipped implementation today is
1029/// [`FileBackend`].
1030///
1031/// All methods are async because hardware backends inevitably involve
1032/// IO (TPM sessions, network round-trips to KMS, etc.). The file
1033/// backend's IO is the existing tokio-fs path.
1034#[async_trait::async_trait]
1035pub trait SigningBackend: Send + Sync + std::fmt::Debug {
1036    /// Returns a human-readable name (`"file"`, `"tpm"`, …) for log
1037    /// lines and `--key-store-backend` debug output.
1038    fn name(&self) -> &'static str;
1039
1040    /// Returns `true` if private key material lives in tamper-resistant
1041    /// hardware. Pure-software backends return `false`. Used for
1042    /// startup logging and the "key-store-backend: tpm (hw-backed)"
1043    /// banner.
1044    fn is_hardware_backed(&self) -> bool;
1045
1046    /// Lowercase hex first-8-chars-of-SHA256 of the currently-active
1047    /// verifying key. Stable across processes for the same key.
1048    async fn active_key_id(&self) -> Result<String, SecretsError>;
1049
1050    /// URL-safe no-pad base64 of the verifying key bytes for the given
1051    /// `kid`. Returns `Ok(None)` if the kid is unknown or its grace
1052    /// window has expired.
1053    async fn public_key_b64(&self, kid: &str) -> Result<Option<String>, SecretsError>;
1054
1055    /// All currently-valid (active OR not-yet-expired grace) keys in
1056    /// the store with their statuses.
1057    async fn list_valid_pubkeys(&self) -> Result<Vec<PubkeyInfo>, SecretsError>;
1058
1059    /// Sign `msg` with the key identified by `kid`. Fails with
1060    /// `SecretsError::Provider` if the kid is unknown or expired.
1061    /// Note that signing with a grace-window key is unusual (callers
1062    /// typically only sign with the active key) but supported for
1063    /// recovery scenarios.
1064    async fn sign(&self, kid: &str, msg: &[u8]) -> Result<[u8; 64], SecretsError>;
1065
1066    /// Rotate the keystore: generate a new active key, move the
1067    /// previous active into the grace window for `grace`, and return
1068    /// the new active kid + public key. Idempotent only in the sense
1069    /// that calling twice produces two rotations.
1070    async fn rotate(
1071        &self,
1072        grace: std::time::Duration,
1073    ) -> Result<KeystoreRotationResult, SecretsError>;
1074
1075    /// Prune any grace-window entries whose retention has elapsed.
1076    /// Returns the count of pruned entries. Called periodically by the
1077    /// daemon's keystore sweep task.
1078    async fn prune_expired_grace(&self) -> Result<usize, SecretsError>;
1079}
1080
1081/// File-backed `SigningBackend` implementation.
1082///
1083/// Thin adapter over the existing JSON-keystore free functions
1084/// (`load_signer_for_kid`, `rotate_keystore`, `list_valid_pubkeys`,
1085/// `prune_expired_grace`). Each call opens the file fresh so external
1086/// edits (e.g., the daemon's hourly sweep) are picked up without
1087/// needing a cache-invalidation hook.
1088///
1089/// Private key material lives on disk encrypted only by filesystem
1090/// permissions (0600 owner-only). For tamper-resistant storage use a
1091/// future TPM or `YubiHSM` backend.
1092#[derive(Debug, Clone)]
1093pub struct FileBackend {
1094    path: std::path::PathBuf,
1095}
1096
1097impl FileBackend {
1098    /// Create a new `FileBackend` rooted at `path`. The path is the
1099    /// JSON keystore file; if it does not yet exist, the first
1100    /// operation that requires it (any of the trait methods) will
1101    /// create it via `load_or_generate`.
1102    #[must_use]
1103    pub fn new(path: std::path::PathBuf) -> Self {
1104        Self { path }
1105    }
1106
1107    /// Path to the JSON keystore this backend wraps. Useful for
1108    /// debugging / migration tooling.
1109    #[must_use]
1110    pub fn path(&self) -> &Path {
1111        &self.path
1112    }
1113}
1114
1115#[async_trait::async_trait]
1116impl SigningBackend for FileBackend {
1117    fn name(&self) -> &'static str {
1118        "file"
1119    }
1120
1121    fn is_hardware_backed(&self) -> bool {
1122        false
1123    }
1124
1125    async fn active_key_id(&self) -> Result<String, SecretsError> {
1126        // `load_or_generate` returns the *active* signer; its `key_id`
1127        // is the answer. Idempotent — does nothing if the file
1128        // already exists with a valid active entry.
1129        let signer = ClusterSigner::load_or_generate(&self.path).await?;
1130        Ok(signer.key_id())
1131    }
1132
1133    async fn public_key_b64(&self, kid: &str) -> Result<Option<String>, SecretsError> {
1134        Ok(load_signer_for_kid(&self.path, kid)
1135            .await?
1136            .map(|s| s.public_key_b64()))
1137    }
1138
1139    async fn list_valid_pubkeys(&self) -> Result<Vec<PubkeyInfo>, SecretsError> {
1140        list_valid_pubkeys(&self.path).await
1141    }
1142
1143    async fn sign(&self, kid: &str, msg: &[u8]) -> Result<[u8; 64], SecretsError> {
1144        let signer = load_signer_for_kid(&self.path, kid).await?.ok_or_else(|| {
1145            SecretsError::Provider(format!(
1146                "kid {kid} not in keystore (unknown or grace expired)"
1147            ))
1148        })?;
1149        Ok(signer.sign(msg))
1150    }
1151
1152    async fn rotate(
1153        &self,
1154        grace: std::time::Duration,
1155    ) -> Result<KeystoreRotationResult, SecretsError> {
1156        rotate_keystore(&self.path, grace).await
1157    }
1158
1159    async fn prune_expired_grace(&self) -> Result<usize, SecretsError> {
1160        prune_expired_grace(&self.path).await
1161    }
1162}
1163
1164#[cfg(test)]
1165mod tests {
1166    use super::*;
1167    use ed25519_dalek::{Signature, Verifier};
1168    use tempfile::tempdir;
1169
1170    #[test]
1171    fn generate_produces_distinct_keys() {
1172        let a = ClusterSigner::generate();
1173        let b = ClusterSigner::generate();
1174        assert_ne!(
1175            a.key_id(),
1176            b.key_id(),
1177            "two fresh generate() calls produced the same key_id"
1178        );
1179        assert_ne!(a.verifying_key().as_bytes(), b.verifying_key().as_bytes());
1180    }
1181
1182    #[tokio::test]
1183    async fn round_trip_through_disk() {
1184        let dir = tempdir().unwrap();
1185        let path = dir.path().join("cluster_signer.key");
1186
1187        let first = ClusterSigner::load_or_generate(&path).await.unwrap();
1188        let first_id = first.key_id();
1189
1190        // File should now exist as a JSON keystore.
1191        assert!(path.exists(), "key file was not persisted");
1192        let on_disk = std::fs::read_to_string(&path).unwrap();
1193        assert!(
1194            on_disk.starts_with('{'),
1195            "expected JSON keystore, got: {on_disk:?}"
1196        );
1197
1198        // Second call must load, not regenerate.
1199        let second = ClusterSigner::load_or_generate(&path).await.unwrap();
1200        assert_eq!(
1201            first_id,
1202            second.key_id(),
1203            "second load_or_generate regenerated instead of loading"
1204        );
1205        assert_eq!(
1206            first.verifying_key().as_bytes(),
1207            second.verifying_key().as_bytes()
1208        );
1209    }
1210
1211    #[test]
1212    fn sign_then_verify() {
1213        let signer = ClusterSigner::generate();
1214        let msg = b"join-token-payload-v1";
1215
1216        let sig_bytes = signer.sign(msg);
1217        let sig = Signature::from_bytes(&sig_bytes);
1218        signer
1219            .verifying_key()
1220            .verify(msg, &sig)
1221            .expect("valid signature should verify");
1222
1223        // Flipping a bit in the message must invalidate the signature.
1224        let mut tampered = msg.to_vec();
1225        tampered[0] ^= 0x01;
1226        assert!(
1227            signer.verifying_key().verify(&tampered, &sig).is_err(),
1228            "tampered message verified against original signature"
1229        );
1230
1231        // Flipping a bit in the signature must also fail.
1232        let mut bad_sig_bytes = sig_bytes;
1233        bad_sig_bytes[0] ^= 0x01;
1234        let bad_sig = Signature::from_bytes(&bad_sig_bytes);
1235        assert!(
1236            signer.verifying_key().verify(msg, &bad_sig).is_err(),
1237            "tampered signature verified against original message"
1238        );
1239    }
1240
1241    #[test]
1242    fn key_id_is_8_hex_chars() {
1243        let signer = ClusterSigner::generate();
1244        let id = signer.key_id();
1245        assert_eq!(id.len(), 8, "key_id should be 8 hex chars, got {id:?}");
1246        assert!(
1247            id.chars().all(|c| c.is_ascii_hexdigit()),
1248            "key_id should be hex, got {id:?}"
1249        );
1250    }
1251
1252    #[test]
1253    fn public_key_b64_round_trips() {
1254        let signer = ClusterSigner::generate();
1255        let b64 = signer.public_key_b64();
1256        // URL_SAFE_NO_PAD of 32 bytes = ceil(32 * 4 / 3) = 43 chars, no padding.
1257        assert_eq!(b64.len(), 43);
1258        let decoded = URL_SAFE_NO_PAD.decode(&b64).unwrap();
1259        assert_eq!(decoded, signer.verifying_key().as_bytes());
1260    }
1261
1262    /// Garbage files must be rejected. The new format is JSON, so a 16-byte
1263    /// blob is neither valid keystore JSON nor a 32-byte legacy seed and
1264    /// must produce a `SecretsError::Storage`.
1265    #[tokio::test]
1266    async fn load_or_generate_rejects_garbage_file() {
1267        let dir = tempdir().unwrap();
1268        let path = dir.path().join("cluster_signer.key");
1269        std::fs::write(&path, [0u8; 16]).unwrap();
1270
1271        let err = ClusterSigner::load_or_generate(&path)
1272            .await
1273            .expect_err("should reject 16-byte garbage file");
1274        match err {
1275            SecretsError::Storage(msg) => {
1276                assert!(
1277                    msg.contains("unexpected format"),
1278                    "expected 'unexpected format' error, got: {msg}"
1279                );
1280            }
1281            other => panic!("expected SecretsError::Storage, got {other:?}"),
1282        }
1283    }
1284
1285    #[cfg(unix)]
1286    #[tokio::test]
1287    async fn persisted_file_is_mode_0600_on_unix() {
1288        use std::os::unix::fs::PermissionsExt;
1289
1290        let dir = tempdir().unwrap();
1291        let path = dir.path().join("cluster_signer.key");
1292        let _ = ClusterSigner::load_or_generate(&path).await.unwrap();
1293
1294        let mode = std::fs::metadata(&path).unwrap().permissions().mode();
1295        assert_eq!(mode & 0o777, 0o600, "expected mode 0600, got {mode:o}");
1296    }
1297
1298    /// A pre-existing 32-byte raw-seed file (Wave 1 format) must be migrated
1299    /// in place on first load, and the resulting keystore must produce the
1300    /// same signing key on subsequent loads.
1301    #[tokio::test]
1302    async fn migration_from_raw_seed_file_works_once() {
1303        let dir = tempdir().unwrap();
1304        let path = dir.path().join("cluster_signing.key");
1305
1306        // 1. Write a legacy 32-byte file.
1307        let mut legacy_seed = [0u8; 32];
1308        rand::rngs::OsRng.try_fill_bytes(&mut legacy_seed).unwrap();
1309        std::fs::write(&path, legacy_seed).unwrap();
1310
1311        // 2. First load_or_generate triggers migration.
1312        let signer = ClusterSigner::load_or_generate(&path).await.unwrap();
1313        let migrated_kid = signer.key_id();
1314
1315        // 3. File should now be JSON.
1316        let content = std::fs::read_to_string(&path).unwrap();
1317        assert!(
1318            content.starts_with('{'),
1319            "expected JSON keystore after migration, got: {content:?}"
1320        );
1321        assert!(content.contains("\"version\":"));
1322        assert!(content.contains("\"active\":"));
1323
1324        // 4. Re-loading gives the same key.
1325        let again = ClusterSigner::load_or_generate(&path).await.unwrap();
1326        assert_eq!(again.key_id(), migrated_kid);
1327
1328        // 5. Public key bytes match — confirms the seed survived migration.
1329        assert_eq!(
1330            signer.verifying_key().as_bytes(),
1331            again.verifying_key().as_bytes()
1332        );
1333    }
1334
1335    #[tokio::test]
1336    async fn fresh_load_or_generate_produces_json_keystore() {
1337        let dir = tempdir().unwrap();
1338        let path = dir.path().join("cluster_signing.key");
1339        let _ = ClusterSigner::load_or_generate(&path).await.unwrap();
1340
1341        let content = std::fs::read_to_string(&path).unwrap();
1342        assert!(content.contains("\"version\":"));
1343        assert!(content.contains("\"active\":"));
1344        assert!(content.contains("\"keys\":"));
1345    }
1346
1347    #[tokio::test]
1348    async fn load_or_generate_idempotent_on_keystore() {
1349        let dir = tempdir().unwrap();
1350        let path = dir.path().join("cluster_signing.key");
1351
1352        let first = ClusterSigner::load_or_generate(&path).await.unwrap();
1353        let json1 = std::fs::read_to_string(&path).unwrap();
1354
1355        let second = ClusterSigner::load_or_generate(&path).await.unwrap();
1356        let json2 = std::fs::read_to_string(&path).unwrap();
1357
1358        assert_eq!(first.key_id(), second.key_id());
1359        assert_eq!(
1360            json1, json2,
1361            "keystore should not be rewritten on a no-op load"
1362        );
1363    }
1364
1365    #[cfg(unix)]
1366    #[tokio::test]
1367    async fn keystore_file_is_mode_0600_on_unix() {
1368        use std::os::unix::fs::PermissionsExt;
1369
1370        let dir = tempdir().unwrap();
1371        let path = dir.path().join("cluster_signing.key");
1372        let _ = ClusterSigner::load_or_generate(&path).await.unwrap();
1373
1374        let mode = std::fs::metadata(&path).unwrap().permissions().mode();
1375        assert_eq!(mode & 0o777, 0o600);
1376    }
1377
1378    // -------------------------------------------------------------------
1379    // Wave 5A.2 — rotation, multi-kid lookup, and grace pruning.
1380    // -------------------------------------------------------------------
1381
1382    /// Helper: read the keystore file directly off disk for assertions.
1383    async fn read_store(path: &Path) -> KeyStoreFile {
1384        ClusterSigner::read_keystore(path).await.unwrap()
1385    }
1386
1387    #[tokio::test]
1388    async fn rotation_flips_active_and_old_keeps_grace() {
1389        let dir = tempdir().unwrap();
1390        let path = dir.path().join("cluster_signing.key");
1391
1392        let original = ClusterSigner::load_or_generate(&path).await.unwrap();
1393        let original_kid = original.key_id();
1394
1395        let result = rotate_keystore(&path, std::time::Duration::from_secs(3600))
1396            .await
1397            .unwrap();
1398        assert_ne!(result.new_active_kid, result.previous_kid);
1399        assert_eq!(result.previous_kid, original_kid);
1400
1401        let store = read_store(&path).await;
1402        assert_eq!(store.active, result.new_active_kid);
1403        assert_eq!(store.keys.len(), 2);
1404        assert!(store.retired_grace_until.contains_key(&result.previous_kid));
1405        // The new active key must NOT have a grace entry.
1406        assert!(!store
1407            .retired_grace_until
1408            .contains_key(&result.new_active_kid));
1409    }
1410
1411    #[tokio::test]
1412    async fn rotation_returns_correct_grace_expiry() {
1413        let dir = tempdir().unwrap();
1414        let path = dir.path().join("cluster_signing.key");
1415        let _ = ClusterSigner::load_or_generate(&path).await.unwrap();
1416
1417        let before = Utc::now();
1418        let result = rotate_keystore(&path, std::time::Duration::from_secs(7200))
1419            .await
1420            .unwrap();
1421        let after = Utc::now();
1422
1423        // grace_until should be roughly `before + 2h` ..= `after + 2h`.
1424        let lower = before + chrono::Duration::seconds(7200);
1425        let upper = after + chrono::Duration::seconds(7200);
1426        assert!(
1427            result.previous_grace_until >= lower && result.previous_grace_until <= upper,
1428            "grace_until {:?} not within expected window [{:?}, {:?}]",
1429            result.previous_grace_until,
1430            lower,
1431            upper,
1432        );
1433
1434        // Persisted value must match the returned value.
1435        let store = read_store(&path).await;
1436        assert_eq!(
1437            store.retired_grace_until.get(&result.previous_kid).copied(),
1438            Some(result.previous_grace_until)
1439        );
1440    }
1441
1442    #[tokio::test]
1443    async fn load_signer_for_kid_returns_active() {
1444        let dir = tempdir().unwrap();
1445        let path = dir.path().join("cluster_signing.key");
1446        let active = ClusterSigner::load_or_generate(&path).await.unwrap();
1447
1448        let loaded = load_signer_for_kid(&path, &active.key_id())
1449            .await
1450            .unwrap()
1451            .expect("active kid should load");
1452        assert_eq!(loaded.key_id(), active.key_id());
1453        assert_eq!(
1454            loaded.verifying_key().as_bytes(),
1455            active.verifying_key().as_bytes()
1456        );
1457    }
1458
1459    #[tokio::test]
1460    async fn load_signer_for_kid_returns_grace() {
1461        let dir = tempdir().unwrap();
1462        let path = dir.path().join("cluster_signing.key");
1463        let original = ClusterSigner::load_or_generate(&path).await.unwrap();
1464        let original_kid = original.key_id();
1465
1466        let _result = rotate_keystore(&path, std::time::Duration::from_secs(3600))
1467            .await
1468            .unwrap();
1469
1470        let loaded = load_signer_for_kid(&path, &original_kid)
1471            .await
1472            .unwrap()
1473            .expect("grace-period kid should still load");
1474        assert_eq!(loaded.key_id(), original_kid);
1475        // It must produce the same verifying key as the original instance.
1476        assert_eq!(
1477            loaded.verifying_key().as_bytes(),
1478            original.verifying_key().as_bytes()
1479        );
1480    }
1481
1482    #[tokio::test]
1483    async fn load_signer_for_kid_returns_none_for_unknown() {
1484        let dir = tempdir().unwrap();
1485        let path = dir.path().join("cluster_signing.key");
1486        let _ = ClusterSigner::load_or_generate(&path).await.unwrap();
1487
1488        let loaded = load_signer_for_kid(&path, "deadbeef").await.unwrap();
1489        assert!(loaded.is_none(), "unknown kid should return None");
1490    }
1491
1492    #[tokio::test]
1493    async fn load_signer_for_kid_returns_none_for_expired_grace() {
1494        let dir = tempdir().unwrap();
1495        let path = dir.path().join("cluster_signing.key");
1496        let _ = ClusterSigner::load_or_generate(&path).await.unwrap();
1497
1498        // Rotate to populate `retired_grace_until`, then manually rewind the
1499        // expiration into the past.
1500        let result = rotate_keystore(&path, std::time::Duration::from_secs(3600))
1501            .await
1502            .unwrap();
1503
1504        let mut store = read_store(&path).await;
1505        let past = Utc::now() - chrono::Duration::seconds(60);
1506        store
1507            .retired_grace_until
1508            .insert(result.previous_kid.clone(), past);
1509        ClusterSigner::write_keystore(&path, &store).await.unwrap();
1510
1511        let loaded = load_signer_for_kid(&path, &result.previous_kid)
1512            .await
1513            .unwrap();
1514        assert!(
1515            loaded.is_none(),
1516            "kid with expired grace must not load via load_signer_for_kid"
1517        );
1518    }
1519
1520    #[tokio::test]
1521    async fn list_valid_pubkeys_returns_active_first_then_grace() {
1522        let dir = tempdir().unwrap();
1523        let path = dir.path().join("cluster_signing.key");
1524        let original = ClusterSigner::load_or_generate(&path).await.unwrap();
1525        let original_kid = original.key_id();
1526
1527        let result = rotate_keystore(&path, std::time::Duration::from_secs(3600))
1528            .await
1529            .unwrap();
1530        assert_eq!(result.previous_kid, original_kid);
1531
1532        let list = list_valid_pubkeys(&path).await.unwrap();
1533        assert_eq!(list.len(), 2);
1534        assert_eq!(list[0].status, PubkeyStatus::Active);
1535        assert_eq!(list[0].kid, result.new_active_kid);
1536        assert!(list[0].valid_until.is_none());
1537        assert_eq!(list[1].status, PubkeyStatus::Grace);
1538        assert_eq!(list[1].kid, original_kid);
1539        assert!(list[1].valid_until.is_some());
1540    }
1541
1542    #[tokio::test]
1543    async fn list_valid_pubkeys_omits_expired_grace() {
1544        let dir = tempdir().unwrap();
1545        let path = dir.path().join("cluster_signing.key");
1546        let _ = ClusterSigner::load_or_generate(&path).await.unwrap();
1547
1548        let result = rotate_keystore(&path, std::time::Duration::from_secs(3600))
1549            .await
1550            .unwrap();
1551
1552        // Rewind grace into the past — the listing must drop it.
1553        let mut store = read_store(&path).await;
1554        store.retired_grace_until.insert(
1555            result.previous_kid.clone(),
1556            Utc::now() - chrono::Duration::seconds(1),
1557        );
1558        ClusterSigner::write_keystore(&path, &store).await.unwrap();
1559
1560        let list = list_valid_pubkeys(&path).await.unwrap();
1561        assert_eq!(list.len(), 1, "expired-grace entry should be omitted");
1562        assert_eq!(list[0].kid, result.new_active_kid);
1563        assert_eq!(list[0].status, PubkeyStatus::Active);
1564    }
1565
1566    #[tokio::test]
1567    async fn prune_expired_grace_removes_expired_entries() {
1568        let dir = tempdir().unwrap();
1569        let path = dir.path().join("cluster_signing.key");
1570        let _ = ClusterSigner::load_or_generate(&path).await.unwrap();
1571
1572        let result = rotate_keystore(&path, std::time::Duration::from_secs(3600))
1573            .await
1574            .unwrap();
1575
1576        // Force the grace to be in the past.
1577        let mut store = read_store(&path).await;
1578        store.retired_grace_until.insert(
1579            result.previous_kid.clone(),
1580            Utc::now() - chrono::Duration::seconds(1),
1581        );
1582        ClusterSigner::write_keystore(&path, &store).await.unwrap();
1583
1584        let pruned = prune_expired_grace(&path).await.unwrap();
1585        assert_eq!(pruned, 1);
1586
1587        let after = read_store(&path).await;
1588        assert_eq!(after.keys.len(), 1);
1589        assert_eq!(after.keys[0].id, result.new_active_kid);
1590        assert!(after.retired_grace_until.is_empty());
1591    }
1592
1593    #[tokio::test]
1594    async fn prune_expired_grace_is_idempotent() {
1595        let dir = tempdir().unwrap();
1596        let path = dir.path().join("cluster_signing.key");
1597        let _ = ClusterSigner::load_or_generate(&path).await.unwrap();
1598
1599        // No grace entries at all — first call is a no-op, returns 0.
1600        let first = prune_expired_grace(&path).await.unwrap();
1601        assert_eq!(first, 0);
1602        let bytes_after_first = std::fs::read(&path).unwrap();
1603
1604        // Second call must also be 0 and must NOT rewrite the file.
1605        let second = prune_expired_grace(&path).await.unwrap();
1606        assert_eq!(second, 0);
1607        let bytes_after_second = std::fs::read(&path).unwrap();
1608        assert_eq!(
1609            bytes_after_first, bytes_after_second,
1610            "idempotent prune must not rewrite the keystore on a no-op"
1611        );
1612
1613        // Now add an expired grace and prune.
1614        let result = rotate_keystore(&path, std::time::Duration::from_secs(3600))
1615            .await
1616            .unwrap();
1617        let mut store = read_store(&path).await;
1618        store.retired_grace_until.insert(
1619            result.previous_kid.clone(),
1620            Utc::now() - chrono::Duration::seconds(1),
1621        );
1622        ClusterSigner::write_keystore(&path, &store).await.unwrap();
1623
1624        let count = prune_expired_grace(&path).await.unwrap();
1625        assert_eq!(count, 1);
1626
1627        // Second prune after the cleanup must again be a no-op.
1628        let count_again = prune_expired_grace(&path).await.unwrap();
1629        assert_eq!(count_again, 0);
1630    }
1631
1632    #[tokio::test]
1633    async fn rotate_then_load_signer_for_old_kid_still_works_within_grace() {
1634        use ed25519_dalek::Verifier;
1635
1636        let dir = tempdir().unwrap();
1637        let path = dir.path().join("cluster_signing.key");
1638        let original = ClusterSigner::load_or_generate(&path).await.unwrap();
1639        let original_kid = original.key_id();
1640
1641        // Sign a message with the original key.
1642        let msg = b"join-token-payload";
1643        let sig_bytes = original.sign(msg);
1644
1645        // Rotate the keystore.
1646        let _result = rotate_keystore(&path, std::time::Duration::from_secs(3600))
1647            .await
1648            .unwrap();
1649
1650        // The old signer should still be loadable within grace.
1651        let loaded = load_signer_for_kid(&path, &original_kid)
1652            .await
1653            .unwrap()
1654            .expect("old kid should still load while in grace");
1655
1656        // And its verifying key should still verify the original signature.
1657        let sig = Signature::from_bytes(&sig_bytes);
1658        loaded
1659            .verifying_key()
1660            .verify(msg, &sig)
1661            .expect("signature from pre-rotation key must verify against in-grace key");
1662    }
1663
1664    // -------------------------------------------------------------------
1665    // Wave 8 — SigningBackend trait + FileBackend adapter.
1666    // -------------------------------------------------------------------
1667
1668    #[tokio::test]
1669    async fn file_backend_round_trips_through_trait() {
1670        let dir = tempfile::tempdir().unwrap();
1671        let path = dir.path().join("ks.json");
1672        let backend: std::sync::Arc<dyn SigningBackend> =
1673            std::sync::Arc::new(FileBackend::new(path.clone()));
1674
1675        let active = backend.active_key_id().await.unwrap();
1676        assert_eq!(active.len(), 8, "kid is 8 hex chars");
1677
1678        let pubkey = backend.public_key_b64(&active).await.unwrap();
1679        assert!(pubkey.is_some(), "active key must resolve via the trait");
1680
1681        // Sign-then-verify-by-hand round trip.
1682        let msg = b"hello signing backend";
1683        let sig = backend.sign(&active, msg).await.unwrap();
1684        let pubkey_bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
1685            .decode(pubkey.unwrap())
1686            .unwrap();
1687        let verifying =
1688            ed25519_dalek::VerifyingKey::from_bytes(&pubkey_bytes.try_into().unwrap()).unwrap();
1689        let signature = ed25519_dalek::Signature::from_bytes(&sig);
1690        verifying
1691            .verify_strict(msg, &signature)
1692            .expect("file-backend signature must verify against its own pubkey");
1693    }
1694
1695    #[tokio::test]
1696    async fn file_backend_reports_software_only() {
1697        let dir = tempfile::tempdir().unwrap();
1698        let backend = FileBackend::new(dir.path().join("ks.json"));
1699        assert_eq!(backend.name(), "file");
1700        assert!(
1701            !backend.is_hardware_backed(),
1702            "file backend must NOT report hardware-backed"
1703        );
1704    }
1705
1706    #[tokio::test]
1707    async fn file_backend_rotate_through_trait_produces_grace_entry() {
1708        let dir = tempfile::tempdir().unwrap();
1709        let path = dir.path().join("ks.json");
1710        let backend = FileBackend::new(path);
1711
1712        // Seed the keystore by reading the initial active key.
1713        let original_kid = backend.active_key_id().await.unwrap();
1714
1715        let result = backend
1716            .rotate(std::time::Duration::from_secs(3600))
1717            .await
1718            .unwrap();
1719        assert_ne!(result.new_active_kid, original_kid);
1720        assert_eq!(result.previous_kid, original_kid);
1721
1722        // Both keys should now appear in the listing.
1723        let infos = backend.list_valid_pubkeys().await.unwrap();
1724        assert_eq!(infos.len(), 2, "active + 1 grace entry after rotation");
1725    }
1726
1727    #[tokio::test]
1728    async fn file_backend_unknown_kid_returns_none() {
1729        let dir = tempfile::tempdir().unwrap();
1730        let backend = FileBackend::new(dir.path().join("ks.json"));
1731        let _ = backend.active_key_id().await.unwrap(); // seed
1732        let unknown = backend.public_key_b64("deadbeef").await.unwrap();
1733        assert!(unknown.is_none(), "unknown kid must resolve to None");
1734    }
1735
1736    // -------------------------------------------------------------------
1737    // Wave 9B — long-lived ClusterCa + CaCert issue/verify.
1738    // -------------------------------------------------------------------
1739
1740    #[tokio::test]
1741    async fn cluster_ca_load_or_generate_round_trip() {
1742        let dir = tempfile::tempdir().unwrap();
1743        let path = dir.path().join("cluster_ca.key");
1744
1745        // First load creates the file.
1746        let ca1 = ClusterCa::load_or_generate(&path).await.unwrap();
1747        let kid1 = ca1.ca_kid();
1748        let pubkey1 = ca1.ca_public_key_b64();
1749        assert_eq!(kid1.len(), 8);
1750        assert!(!pubkey1.is_empty());
1751
1752        // Second load reads back the same key — kid and pubkey are stable.
1753        let ca2 = ClusterCa::load_or_generate(&path).await.unwrap();
1754        assert_eq!(ca2.ca_kid(), kid1);
1755        assert_eq!(ca2.ca_public_key_b64(), pubkey1);
1756
1757        // File is 32 bytes exactly.
1758        let bytes = tokio::fs::read(&path).await.unwrap();
1759        assert_eq!(bytes.len(), 32);
1760    }
1761
1762    #[tokio::test]
1763    async fn cluster_ca_issues_and_verifies_ca_cert() {
1764        use std::time::Duration;
1765
1766        let dir = tempfile::tempdir().unwrap();
1767        let ca_path = dir.path().join("cluster_ca.key");
1768        let ca = ClusterCa::load_or_generate(&ca_path).await.unwrap();
1769
1770        let active_kid = "deadbeef".to_string();
1771        let active_pubkey_b64 = "Y29udGVudG9mYV9ub25fcGtfYi02NF9zaWduZWRfa2V5Xw".to_string();
1772        let cluster_domain = "test-cluster".to_string();
1773
1774        let cert = ca
1775            .issue_ca_cert(
1776                active_kid.clone(),
1777                active_pubkey_b64.clone(),
1778                cluster_domain.clone(),
1779                Duration::from_secs(3600),
1780            )
1781            .unwrap();
1782        assert_eq!(cert.active_kid, active_kid);
1783        assert_eq!(cert.cluster_domain, cluster_domain);
1784        assert_eq!(cert.v, zlayer_types::api::cluster::CA_CERT_FORMAT_VERSION);
1785        assert!(!cert.sig_by_ca.is_empty());
1786
1787        // Verification round-trips against the CA's own pubkey.
1788        ClusterCa::verify_ca_cert(&ca.ca_public_key_b64(), &cert).unwrap();
1789    }
1790
1791    #[tokio::test]
1792    async fn cluster_ca_cert_verification_fails_under_wrong_pubkey() {
1793        use std::time::Duration;
1794
1795        let dir = tempfile::tempdir().unwrap();
1796        let ca = ClusterCa::load_or_generate(&dir.path().join("ca1.key"))
1797            .await
1798            .unwrap();
1799        let other = ClusterCa::load_or_generate(&dir.path().join("ca2.key"))
1800            .await
1801            .unwrap();
1802
1803        let cert = ca
1804            .issue_ca_cert(
1805                "abcd1234".into(),
1806                "ignored-for-this-test-xx".into(),
1807                "test-cluster".into(),
1808                Duration::from_secs(3600),
1809            )
1810            .unwrap();
1811
1812        // Wrong CA pubkey must reject.
1813        let err = ClusterCa::verify_ca_cert(&other.ca_public_key_b64(), &cert).unwrap_err();
1814        let msg = err.to_string();
1815        assert!(
1816            msg.contains("verification failed") || msg.contains("signature"),
1817            "expected sig-verification error; got {msg}"
1818        );
1819    }
1820
1821    #[tokio::test]
1822    async fn cluster_ca_cert_verification_fails_when_expired() {
1823        use std::time::Duration;
1824
1825        let dir = tempfile::tempdir().unwrap();
1826        let ca = ClusterCa::load_or_generate(&dir.path().join("ca.key"))
1827            .await
1828            .unwrap();
1829
1830        // Issue a cert with negligible grace; it expires before we can verify.
1831        let cert = ca
1832            .issue_ca_cert(
1833                "abcd1234".into(),
1834                "ignored-for-this-test-xx".into(),
1835                "test".into(),
1836                Duration::from_millis(1),
1837            )
1838            .unwrap();
1839        tokio::time::sleep(Duration::from_millis(50)).await;
1840
1841        let err = ClusterCa::verify_ca_cert(&ca.ca_public_key_b64(), &cert).unwrap_err();
1842        assert!(
1843            err.to_string().contains("expired"),
1844            "expected expired-cert error; got {err}"
1845        );
1846    }
1847}