Skip to main content

net/adapter/net/identity/
token.rs

1//! Permission tokens for Net authorization.
2//!
3//! Tokens are ed25519-signed, delegatable, and expirable. They authorize
4//! an entity to perform specific actions (publish, subscribe, admin) on
5//! specific channels. L2 (Channels & Authorization) enforces these at
6//! subscription time, not per-packet.
7
8use dashmap::DashMap;
9use ed25519_dalek::Signature;
10use std::sync::atomic::{AtomicBool, Ordering as AtomicOrdering};
11use std::sync::Arc;
12use std::time::{SystemTime, UNIX_EPOCH};
13
14use super::entity::{EntityId, EntityKeypair};
15use crate::adapter::net::channel::ChannelHash;
16
17/// Actions a token can authorize.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub struct TokenScope {
20    bits: u32,
21}
22
23impl TokenScope {
24    /// No permissions.
25    pub const NONE: Self = Self { bits: 0 };
26    /// Publish events to a channel.
27    pub const PUBLISH: Self = Self { bits: 0b0001 };
28    /// Subscribe to events from a channel.
29    pub const SUBSCRIBE: Self = Self { bits: 0b0010 };
30    /// Administrative access (create/delete channels, manage tokens).
31    pub const ADMIN: Self = Self { bits: 0b0100 };
32    /// Can delegate this token to other entities.
33    pub const DELEGATE: Self = Self { bits: 0b1000 };
34    /// Wildcard over channels: authorizes the token's actions on *every*
35    /// channel, regardless of the token's `channel_hash` field. Must be
36    /// set explicitly by the issuer — the previous "`channel_hash == 0`
37    /// means wildcard" overload is no longer honored, so a legitimate
38    /// channel whose xxh3-truncated [`ChannelHash`] happens to hash to 0
39    /// cannot accidentally be authorized as a universal grant.
40    pub const WILDCARD: Self = Self { bits: 0b1_0000 };
41    /// Full access (all actions on a single channel). Does NOT include
42    /// [`Self::WILDCARD`] — callers that want cross-channel access
43    /// must opt in explicitly.
44    pub const ALL: Self = Self { bits: 0b1111 };
45
46    /// Create a scope from raw bits.
47    #[inline]
48    pub const fn from_bits(bits: u32) -> Self {
49        Self { bits }
50    }
51
52    /// Get the raw bits.
53    #[inline]
54    pub const fn bits(self) -> u32 {
55        self.bits
56    }
57
58    /// Check if this scope includes another.
59    ///
60    /// A scope never "contains" `NONE`: the bit-mask identity
61    /// `(self.bits & 0) == 0` would otherwise return true for every
62    /// token, so a caller that builds `action: TokenScope` from
63    /// external input — e.g. a wire `u32` masked into a smaller
64    /// subset — that happens to mask to `NONE` would receive a
65    /// blanket `true` against any token. Short-circuit `NONE` so the
66    /// caller's "do they have permission X" question rejects the
67    /// no-op action.
68    #[inline]
69    pub const fn contains(self, other: Self) -> bool {
70        if other.bits == 0 {
71            return false;
72        }
73        (self.bits & other.bits) == other.bits
74    }
75
76    /// Restrict this scope to only include permissions in `other`.
77    #[inline]
78    pub const fn intersect(self, other: Self) -> Self {
79        Self {
80            bits: self.bits & other.bits,
81        }
82    }
83
84    /// Combine with another scope.
85    #[inline]
86    pub const fn union(self, other: Self) -> Self {
87        Self {
88            bits: self.bits | other.bits,
89        }
90    }
91
92    /// Optional channel hash filter. If set, token only applies to
93    /// channels matching this canonical [`ChannelHash`].
94    pub fn with_channel(self, channel_hash: ChannelHash) -> ScopedToken {
95        ScopedToken {
96            scope: self,
97            channel_hash: Some(channel_hash),
98        }
99    }
100}
101
102/// A scope bound to an optional channel.
103#[derive(Debug, Clone, Copy)]
104pub struct ScopedToken {
105    pub scope: TokenScope,
106    pub channel_hash: Option<ChannelHash>,
107}
108
109/// A signed, delegatable permission token.
110///
111/// Wire format (169 bytes):
112/// ```text
113/// issuer:             32 bytes (EntityId)
114/// subject:            32 bytes (EntityId)
115/// scope:               4 bytes (u32)
116/// channel_hash:        8 bytes (ChannelHash, u64; combine with WILDCARD scope for "all channels")
117/// issuer_generation:   4 bytes (u32; floor below which the issuer revokes outstanding tokens)
118/// not_before:          8 bytes (u64 unix timestamp)
119/// not_after:           8 bytes (u64 unix timestamp)
120/// delegation_depth:    1 byte  (u8)
121/// nonce:               8 bytes (u64)
122/// --- signed above ---
123/// signature:          64 bytes (ed25519)
124/// ```
125///
126/// `issuer_generation` participates in revocation: an issuer that
127/// wants to invalidate every outstanding token (including delegated
128/// children) bumps its floor in the [`RevocationRegistry`]; the
129/// cache rejects any token whose generation is below the current
130/// floor. Children inherit their parent's generation at delegation
131/// time, so revoking a parent transitively revokes its descendants
132/// without a parent-chain walk.
133#[derive(Clone)]
134pub struct PermissionToken {
135    /// Who issued this token.
136    pub issuer: EntityId,
137    /// Who this token authorizes.
138    pub subject: EntityId,
139    /// What actions are permitted.
140    pub scope: TokenScope,
141    /// Channel restriction (canonical [`ChannelHash`]; combine with
142    /// [`TokenScope::WILDCARD`] for cross-channel grants).
143    pub channel_hash: ChannelHash,
144    /// Issuer-rotation floor. Tokens with `issuer_generation < current
145    /// floor` in the [`RevocationRegistry`] are rejected by
146    /// [`TokenCache::check`]; bumping the floor invalidates every
147    /// outstanding token from that issuer (including delegated
148    /// children, which inherit the value from their parent).
149    pub issuer_generation: u32,
150    /// Valid from (unix timestamp seconds).
151    pub not_before: u64,
152    /// Valid until (unix timestamp seconds).
153    pub not_after: u64,
154    /// How many times this token can be re-delegated.
155    pub delegation_depth: u8,
156    /// Unique nonce for revocation.
157    pub nonce: u64,
158    /// Ed25519 signature over all preceding fields.
159    pub signature: [u8; 64],
160}
161
162impl PermissionToken {
163    /// Size of the signed payload (everything before the signature).
164    const SIGNED_PAYLOAD_SIZE: usize = 32 + 32 + 4 + 8 + 4 + 8 + 8 + 1 + 8; // 105 bytes
165
166    /// Total serialized size.
167    pub const WIRE_SIZE: usize = Self::SIGNED_PAYLOAD_SIZE + 64; // 169 bytes
168
169    /// Issue a new token.
170    ///
171    /// `duration_secs` is clamped: a value that would overflow
172    /// `now + duration_secs` saturates `not_after` at `u64::MAX`,
173    /// producing a functionally-never-expiring token rather than
174    /// wrapping the timestamp or panicking. Callers who want to
175    /// reject pathological TTLs should range-check at the SDK
176    /// layer.
177    ///
178    /// **Panics** if `issuer_keypair` is public-only (the migration-
179    /// source path zeroizes its keypair after `ActivateAck`, leaving
180    /// such a keypair). FFI callers and any path that may receive a
181    /// public-only keypair must use [`Self::try_issue`] instead;
182    /// `issue` is preserved as a convenience wrapper for callers
183    /// (notably tests) that own a freshly-generated keypair and
184    /// know it has its signing half.
185    pub fn issue(
186        issuer_keypair: &EntityKeypair,
187        subject: EntityId,
188        scope: TokenScope,
189        channel_hash: ChannelHash,
190        duration_secs: u64,
191        delegation_depth: u8,
192    ) -> Self {
193        // Match each `try_issue` failure to a precise panic message.
194        // A blanket `.expect("...public-only keypair...")` would
195        // mis-blame any future variant (today: `ZeroTtl`) on the
196        // ReadOnly path, leading whoever sees the panic to start
197        // chasing a key-loading bug for what is actually a
198        // `duration_secs == 0` callsite.
199        match Self::try_issue(
200            issuer_keypair,
201            subject,
202            scope,
203            channel_hash,
204            duration_secs,
205            delegation_depth,
206        ) {
207            Ok(token) => token,
208            Err(TokenError::ReadOnly) => {
209                panic!("PermissionToken::issue called with a public-only keypair — use try_issue")
210            }
211            Err(TokenError::ZeroTtl) => {
212                panic!("PermissionToken::issue called with duration_secs == 0 — use try_issue")
213            }
214            Err(TokenError::TtlTooLong) => {
215                panic!(
216                    "PermissionToken::issue called with duration_secs > MAX_TOKEN_TTL_SECS \
217                     ({MAX_TOKEN_TTL_SECS}s) — use try_issue"
218                )
219            }
220            Err(e) => panic!("PermissionToken::issue failed: {e:?} — use try_issue"),
221        }
222    }
223
224    /// Fallible counterpart to [`Self::issue`]: returns
225    /// [`TokenError::ReadOnly`] when the issuer keypair lacks its
226    /// signing half (post-migration / public-only keypair) instead
227    /// of panicking. The FFI bindings route through this function
228    /// so a panic doesn't unwind across `extern "C"` into
229    /// C/Go-cgo/NAPI/PyO3 callers — undefined behaviour.
230    pub fn try_issue(
231        issuer_keypair: &EntityKeypair,
232        subject: EntityId,
233        scope: TokenScope,
234        channel_hash: ChannelHash,
235        duration_secs: u64,
236        delegation_depth: u8,
237    ) -> Result<Self, TokenError> {
238        // A TTL of 0 produces a token with
239        // `not_after == not_before`. The signature verifies but
240        // `is_valid()` rejects it as `Expired` immediately
241        // (`is_valid` uses strict `now >= not_after`, so a token
242        // with `not_after == now` is born expired). The caller
243        // mints something unusable with no diagnostic. Reject at
244        // issue time so the bug surfaces as a typed error rather
245        // than a silent "every check fails on the receiver".
246        if duration_secs == 0 {
247            return Err(TokenError::ZeroTtl);
248        }
249        // Reject TTLs past the hard ceiling. An unbounded TTL (up to
250        // `u64::MAX`) saturates `not_after` into a never-expiring
251        // token that can only be retired via the advisory revocation
252        // floor — see [`MAX_TOKEN_TTL_SECS`]. Reject at issue time so
253        // the misuse surfaces as a typed error instead of an
254        // effectively immortal credential.
255        if duration_secs > MAX_TOKEN_TTL_SECS {
256            return Err(TokenError::TtlTooLong);
257        }
258        let now = current_timestamp();
259        // Abort on `getrandom` failure rather than
260        // panic-unwinding through the FFI boundary. Token nonces
261        // need uniqueness (replay-distinct re-issues), and a
262        // predictable nonce + signed payload would let an attacker
263        // re-mint identical-looking tokens — termination is the
264        // only safe response.
265        let mut nonce_bytes = [0u8; 8];
266        if let Err(e) = getrandom::fill(&mut nonce_bytes) {
267            eprintln!(
268                "FATAL: PermissionToken nonce getrandom failure ({e:?}); aborting to avoid predictable token nonce"
269            );
270            std::process::abort();
271        }
272        let nonce = u64::from_le_bytes(nonce_bytes);
273
274        let mut token = Self {
275            issuer: issuer_keypair.entity_id().clone(),
276            subject,
277            scope,
278            channel_hash,
279            // Default to generation 0. Callers that maintain a
280            // RevocationRegistry can mint a token bound to a specific
281            // generation via direct struct construction, or rotate
282            // by bumping their floor and re-issuing with a higher
283            // value — see `try_issue_with_generation`.
284            issuer_generation: 0,
285            not_before: now,
286            not_after: now.saturating_add(duration_secs),
287            delegation_depth,
288            nonce,
289            signature: [0u8; 64],
290        };
291
292        let payload = token.signed_payload();
293        // Use `try_sign` to surface a public-only keypair as
294        // `TokenError::ReadOnly` instead of panicking.
295        let sig = issuer_keypair
296            .try_sign(&payload)
297            .map_err(|_| TokenError::ReadOnly)?;
298        token.signature = sig.to_bytes();
299        Ok(token)
300    }
301
302    /// Verify the token's signature against the issuer's public key.
303    pub fn verify(&self) -> Result<(), TokenError> {
304        let payload = self.signed_payload();
305        let sig = Signature::from_bytes(&self.signature);
306        self.issuer
307            .verify(&payload, &sig)
308            .map_err(|_| TokenError::InvalidSignature)
309    }
310
311    /// Check if the token is currently valid (signature + time bounds).
312    ///
313    /// Both bounds are **inclusive-expiry**: the token is live while
314    /// `not_before <= now < not_after`. At `now == not_after` the
315    /// token is already expired. The cache sweep
316    /// (`TokenCache::evict_expired`) has always used this convention
317    /// (`retain(|t| t.not_after > now)` drops boundary entries);
318    /// the earlier `is_valid` / `is_expired` wording accidentally
319    /// treated `not_after` as the last valid second, giving every
320    /// token a one-second bonus over what the sweep believed.
321    /// Aligning everything on strict "< not_after" removes the
322    /// off-by-one and makes the token lifetime exactly
323    /// `duration_secs` seconds as `issue()` promises.
324    ///
325    pub fn is_valid(&self) -> Result<(), TokenError> {
326        self.is_valid_with_skew(0)
327    }
328
329    /// Same as [`Self::is_valid`] but applies `skew_secs` of clock-
330    /// skew tolerance to both bounds. A token is accepted while
331    /// `now >= not_before - skew` AND `now < not_after + skew`.
332    /// [`TokenCache::check`] uses this via the cache's configured
333    /// `clock_skew_secs` (default 0); direct FFI / UI callers stick
334    /// with [`Self::is_valid`].
335    pub fn is_valid_with_skew(&self, skew_secs: u64) -> Result<(), TokenError> {
336        self.verify()?;
337        let now = current_timestamp();
338        // Lower bound: accept tokens whose `not_before` is up to
339        // `skew_secs` in our future. `saturating_sub` pins the
340        // comparison at 0 if a token's `not_before` is smaller
341        // than the tolerance (issuer set `not_before` very early
342        // or used 0); without saturating, the subtraction would
343        // underflow on u64.
344        if now < self.not_before.saturating_sub(skew_secs) {
345            return Err(TokenError::NotYetValid);
346        }
347        // Upper bound: reject only when wall-clock exceeds
348        // `not_after + skew_secs`. `saturating_add` clamps to
349        // u64::MAX (issuer-saturated TTL stays forever-valid).
350        if now >= self.not_after.saturating_add(skew_secs) {
351            return Err(TokenError::Expired);
352        }
353        Ok(())
354    }
355
356    /// Pure time-bound check: `true` iff the host wall-clock has
357    /// reached `not_after`. Deliberately **does not** touch the
358    /// signature — callers wanting end-to-end validity use
359    /// [`Self::is_valid`], and signature integrity alone is
360    /// [`Self::verify`]. This separation matters because a
361    /// tampered-but-expired token is still expired, and every
362    /// binding's `token_is_expired` helper documents itself as a
363    /// pure time check.
364    ///
365    /// Boundary: `now == not_after` ⇒ expired (matches
366    /// [`Self::is_valid`] and the cache's eviction convention).
367    pub fn is_expired(&self) -> bool {
368        current_timestamp() >= self.not_after
369    }
370
371    /// Check if this token authorizes a specific action on a channel.
372    ///
373    /// Returns `true` iff the token's `scope` contains the requested
374    /// `action` AND either:
375    ///
376    /// - the token has the [`TokenScope::WILDCARD`] bit set (authorized
377    ///   on every channel regardless of `channel_hash`), OR
378    /// - the token's `channel_hash` matches the supplied `channel`.
379    ///
380    /// The previous convention — `channel_hash == 0` meaning "wildcard,
381    /// all channels" — is no longer honored. A legitimate channel
382    /// whose xxh3-truncated [`ChannelHash`] hashes to 0 would otherwise
383    /// accidentally turn a narrowly-scoped token into a universal
384    /// grant, which an attacker able to register channel names could
385    /// brute-force since xxh3 is non-cryptographic.
386    pub fn authorizes(&self, action: TokenScope, channel: ChannelHash) -> bool {
387        if !self.scope.contains(action) {
388            return false;
389        }
390        if self.scope.contains(TokenScope::WILDCARD) {
391            return true;
392        }
393        self.channel_hash == channel
394    }
395
396    /// Delegate this token to another entity with restricted scope.
397    ///
398    /// Returns `None` if delegation is not allowed (depth exhausted or
399    /// DELEGATE not in scope).
400    ///
401    /// The child's `not_after` is copied from the parent verbatim,
402    /// NOT derived from `parent.not_after - now`. The subtract-then-
403    /// re-read-clock approach lost multiple seconds of validity
404    /// when the parent was near expiry — the child's `issue()` call
405    /// re-reads `current_timestamp()` and computes
406    /// `now + (parent.not_after - previous_now)`, which rounds down
407    /// by the wall-clock delta between the two reads. Copying
408    /// `not_after` avoids the double-read and guarantees the
409    /// child's lifetime is `parent.not_after - child.not_before`
410    /// exactly.
411    pub fn delegate(
412        &self,
413        signer: &EntityKeypair,
414        new_subject: EntityId,
415        restricted_scope: TokenScope,
416    ) -> Result<Self, TokenError> {
417        // Validate the parent token first
418        self.is_valid()?;
419
420        // Check delegation is allowed
421        if self.delegation_depth == 0 {
422            return Err(TokenError::DelegationExhausted);
423        }
424        if !self.scope.contains(TokenScope::DELEGATE) {
425            return Err(TokenError::DelegationNotAllowed);
426        }
427        // Verify the signer is the subject of this token
428        if signer.entity_id() != &self.subject {
429            return Err(TokenError::NotAuthorized);
430        }
431
432        // New scope is intersection of current scope and requested scope
433        let new_scope = self.scope.intersect(restricted_scope);
434
435        // Issue a child whose `not_after` matches the parent's.
436        // `issue()` stamps `not_before = now`, so the child's
437        // effective lifetime is `parent.not_after - now` — the
438        // same quantity as before, but computed against a single
439        // clock read instead of two. Avoids the near-zero-lifetime
440        // bug when the parent is near expiry.
441        let now = current_timestamp();
442        // Abort on `getrandom` failure rather than
443        // panic-unwinding through the FFI boundary. Token nonces
444        // need uniqueness (replay-distinct re-issues), and a
445        // predictable nonce + signed payload would let an attacker
446        // re-mint identical-looking tokens — termination is the
447        // only safe response.
448        let mut nonce_bytes = [0u8; 8];
449        if let Err(e) = getrandom::fill(&mut nonce_bytes) {
450            eprintln!(
451                "FATAL: PermissionToken nonce getrandom failure ({e:?}); aborting to avoid predictable token nonce"
452            );
453            std::process::abort();
454        }
455        let nonce = u64::from_le_bytes(nonce_bytes);
456
457        let mut child = Self {
458            issuer: signer.entity_id().clone(),
459            subject: new_subject,
460            scope: new_scope,
461            channel_hash: self.channel_hash,
462            // Children inherit the parent's issuer_generation. When the
463            // signer's floor is bumped in the RevocationRegistry, every
464            // outstanding token from that issuer — including this
465            // child — falls below the floor and TokenCache::check
466            // rejects them. That makes a single floor bump transitively
467            // invalidate the chain without a per-link revocation walk.
468            issuer_generation: self.issuer_generation,
469            not_before: now,
470            not_after: self.not_after,
471            delegation_depth: self.delegation_depth - 1,
472            nonce,
473            signature: [0u8; 64],
474        };
475        let payload = child.signed_payload();
476        // Use `try_sign` so a public-only `signer` (post-migration
477        // zeroize) surfaces as `TokenError::ReadOnly` instead of
478        // panicking — same shape as `try_issue`.
479        // The `delegate` signature already returns
480        // `Result<Self, TokenError>`, so callers naturally observe
481        // the new variant without an API change.
482        let sig = signer
483            .try_sign(&payload)
484            .map_err(|_| TokenError::ReadOnly)?;
485        child.signature = sig.to_bytes();
486        Ok(child)
487    }
488
489    /// Serialize the fields that are covered by the signature into
490    /// a fixed-size stack buffer. The struct's signed-payload size
491    /// is a compile-time constant (97 bytes), so we don't need a
492    /// heap allocation per verify — the previous `Vec::with_capacity`
493    /// allocated and freed bytes on every signature check, which
494    /// is the hottest path on every authenticated mesh packet.
495    /// Returning `[u8; SIGNED_PAYLOAD_SIZE]` keeps the layout
496    /// identical to the heap version (the existing callers'
497    /// `&payload` still resolves to a `&[u8]`).
498    /// Materialise the canonical byte payload the signature covers
499    /// (every field except the signature itself). `pub(crate)` so
500    /// only in-crate mint / verify paths can construct a transcript
501    /// for an arbitrary token; a `pub` surface would let any caller
502    /// holding a private key produce signed bytes the API otherwise
503    /// only ships via [`Self::issue`] / [`Self::try_issue`] /
504    /// [`Self::delegate`]. Test harnesses and key-rotation flows
505    /// stay in-crate (or use a `pub` wrapper that enforces invariants).
506    pub(crate) fn signed_payload(&self) -> [u8; Self::SIGNED_PAYLOAD_SIZE] {
507        let mut buf = [0u8; Self::SIGNED_PAYLOAD_SIZE];
508        let mut off = 0;
509        buf[off..off + 32].copy_from_slice(self.issuer.as_bytes());
510        off += 32;
511        buf[off..off + 32].copy_from_slice(self.subject.as_bytes());
512        off += 32;
513        buf[off..off + 4].copy_from_slice(&self.scope.bits().to_le_bytes());
514        off += 4;
515        // 8 bytes for channel_hash (u64) — see WIRE_SIZE comment.
516        buf[off..off + 8].copy_from_slice(&self.channel_hash.to_le_bytes());
517        off += 8;
518        // 4 bytes for issuer_generation (revocation floor).
519        buf[off..off + 4].copy_from_slice(&self.issuer_generation.to_le_bytes());
520        off += 4;
521        buf[off..off + 8].copy_from_slice(&self.not_before.to_le_bytes());
522        off += 8;
523        buf[off..off + 8].copy_from_slice(&self.not_after.to_le_bytes());
524        off += 8;
525        buf[off] = self.delegation_depth;
526        off += 1;
527        buf[off..off + 8].copy_from_slice(&self.nonce.to_le_bytes());
528        buf
529    }
530
531    /// Serialize to wire format.
532    pub fn to_bytes(&self) -> Vec<u8> {
533        let mut buf = Vec::with_capacity(Self::WIRE_SIZE);
534        buf.extend_from_slice(&self.signed_payload());
535        buf.extend_from_slice(&self.signature);
536        buf
537    }
538
539    /// Deserialize from wire format.
540    ///
541    /// Rejects buffers whose length is anything other than exactly
542    /// [`Self::WIRE_SIZE`]. Previously this method only guarded the
543    /// lower bound, silently accepting concatenated or trailing-
544    /// garbage payloads — which weakened the wire-format contract
545    /// and let malformed blobs parse as valid tokens. Callers
546    /// framing tokens inside a larger message must slice to exactly
547    /// `WIRE_SIZE` before calling this.
548    #[expect(
549        clippy::unwrap_used,
550        reason = "data.len() == WIRE_SIZE checked above; fixed-offset slices into the buffer convert infallibly to fixed-size arrays"
551    )]
552    pub fn from_bytes(data: &[u8]) -> Result<Self, TokenError> {
553        if data.len() != Self::WIRE_SIZE {
554            return Err(TokenError::InvalidFormat);
555        }
556
557        // Offsets reflect channel_hash (8 bytes) at byte 68 and
558        // issuer_generation (4 bytes) at byte 76.
559        let issuer = EntityId::from_bytes(data[0..32].try_into().unwrap());
560        let subject = EntityId::from_bytes(data[32..64].try_into().unwrap());
561        let scope = TokenScope::from_bits(u32::from_le_bytes(data[64..68].try_into().unwrap()));
562        let channel_hash = ChannelHash::from_le_bytes(data[68..76].try_into().unwrap());
563        let issuer_generation = u32::from_le_bytes(data[76..80].try_into().unwrap());
564        let not_before = u64::from_le_bytes(data[80..88].try_into().unwrap());
565        let not_after = u64::from_le_bytes(data[88..96].try_into().unwrap());
566        let delegation_depth = data[96];
567        let nonce = u64::from_le_bytes(data[97..105].try_into().unwrap());
568        let mut signature = [0u8; 64];
569        signature.copy_from_slice(&data[105..169]);
570
571        Ok(Self {
572            issuer,
573            subject,
574            scope,
575            channel_hash,
576            issuer_generation,
577            not_before,
578            not_after,
579            delegation_depth,
580            nonce,
581            signature,
582        })
583    }
584}
585
586impl std::fmt::Debug for PermissionToken {
587    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
588        f.debug_struct("PermissionToken")
589            .field("issuer", &self.issuer)
590            .field("subject", &self.subject)
591            .field("scope", &format!("{:04b}", self.scope.bits()))
592            .field("channel_hash", &format!("{:08x}", self.channel_hash))
593            .field("delegation_depth", &self.delegation_depth)
594            .field("nonce", &self.nonce)
595            .finish()
596    }
597}
598
599/// Soft cap on the number of `(subject, channel_hash)` slots in a
600/// [`TokenCache`]. Set well above any realistic deployment (a node
601/// with 65 K distinct subject-channel pairs is itself an outlier)
602/// while bounding the memory cost of a peer-driven flood (BUG
603/// #146): pre-cap, `insert`/`insert_unchecked` admitted unlimited
604/// novel keys and a peer issuing or replaying signed tokens grew
605/// the cache linearly in `(subject × channel)` cardinality.
606/// Existing entries always refresh; only NEW slot keys are
607/// rejected at the cap. `evict_expired` reclaims slots as their
608/// tokens lapse, so admission resumes once memory pressure eases.
609pub const MAX_TOKEN_SLOTS: usize = 65_536;
610
611/// Soft cap on the number of distinct-scope tokens stored within a
612/// single `(subject, channel_hash)` slot. `TokenScope` is a u32
613/// bitfield, so up to 2^32 distinct values are theoretically
614/// possible — in practice issuers compose from a small set of
615/// {PUBLISH, SUBSCRIBE, ADMIN, DELEGATE, WILDCARD}, so 32 is far
616/// past real usage while bounding within-slot growth.
617pub const MAX_TOKENS_PER_SLOT: usize = 32;
618
619/// Recommended default clock-skew tolerance for [`TokenCache`]
620/// (production deployments).
621///
622/// Apply this via [`TokenCache::with_clock_skew`] when a node may
623/// observe wall-clock drift against the rest of the mesh — typical
624/// NTP-synced fleets stay within seconds, but containerized edge
625/// deployments routinely drift tens of seconds and would otherwise
626/// reject freshly-issued tokens or honour tokens others treat as
627/// expired. The constant is recommended-not-default because
628/// `TokenCache::new` defaults to **strict** (skew = 0) to preserve
629/// the existing-deployment expiry contract; operators opt in by
630/// constructing `TokenCache::with_clock_skew(TOKEN_CLOCK_SKEW_SECS_RECOMMENDED)`.
631///
632/// The tolerance applies only through [`TokenCache::check`].
633/// [`PermissionToken::is_valid`] and [`PermissionToken::is_expired`]
634/// remain strict wall-clock checks for FFI / UI callers.
635pub const TOKEN_CLOCK_SKEW_SECS_RECOMMENDED: u64 = 60;
636
637/// Hard upper bound on a freshly-issued token's TTL (1 year).
638///
639/// `try_issue` rejects any `duration_secs` above this with
640/// [`TokenError::TtlTooLong`]. Without a cap a caller could mint a
641/// token with `duration_secs == u64::MAX`, whose `not_after`
642/// saturates and never expires — and since revocation is only the
643/// advisory per-issuer `RevocationRegistry` floor, a leaked
644/// never-expiring credential is effectively impossible to retire on a
645/// node that hasn't learned to bump the floor. Bounding the issuance
646/// window forces long-lived grants to be periodically re-issued (which
647/// re-checks the issuer's signing key and current policy) and caps the
648/// blast radius of any single leaked token. Delegation only ever
649/// narrows expiry (`delegate` copies the parent's `not_after`), so the
650/// chain stays within this bound transitively.
651pub const MAX_TOKEN_TTL_SECS: u64 = 365 * 24 * 60 * 60;
652
653/// Hard upper bound on [`TokenCache`] clock-skew tolerance (5
654/// minutes).
655///
656/// [`TokenCache::with_clock_skew`] / [`TokenCache::set_clock_skew`]
657/// clamp any larger value to this. Skew widens every token's validity
658/// window symmetrically — an out-of-bound skew would keep expired
659/// tokens accepted for that many extra seconds across the whole cache.
660/// Five minutes comfortably covers real NTP / container drift (the
661/// recommended default is [`TOKEN_CLOCK_SKEW_SECS_RECOMMENDED`] = 60s)
662/// while preventing a misconfiguration from turning the expiry check
663/// into a rubber stamp.
664pub const MAX_TOKEN_CLOCK_SKEW_SECS: u64 = 5 * 60;
665
666/// Fast permission lookup cache.
667///
668/// Keyed by `(subject EntityId, channel_hash)`. Each slot holds a
669/// **list** of tokens — previous versions kept a single token per
670/// slot, which silently dropped tokens when the same subject needed
671/// multiple distinct scopes on the same channel (e.g. one PUBLISH
672/// token and one SUBSCRIBE token). On insert the incoming token
673/// replaces any existing entry with an **identical scope bitfield**
674/// so a refresh doesn't stack duplicates, but tokens with different
675/// scopes coexist.
676///
677/// Entries are not evicted automatically — callers should check
678/// `is_valid()` on retrieved tokens, or call [`Self::evict_expired`]
679/// on a cadence.
680///
681/// Capacity is bounded by [`MAX_TOKEN_SLOTS`] (slot count) and
682/// [`MAX_TOKENS_PER_SLOT`] (tokens-with-distinct-scope per slot).
683pub struct TokenCache {
684    tokens: DashMap<([u8; 32], ChannelHash), Vec<PermissionToken>>,
685    /// Per-issuer revocation floor. A token whose
686    /// `issuer_generation` is strictly below the floor stored here
687    /// for its issuer is rejected by [`Self::check`]. None = the
688    /// cache shares the process-wide registry it was created with;
689    /// callers that want isolated revocation state (test harnesses,
690    /// multiple tenants in the same process) inject their own via
691    /// [`Self::with_revocation_registry`].
692    revocation: Arc<RevocationRegistry>,
693    /// Wall-clock skew tolerance applied when `check` evaluates a
694    /// token's time bounds. Default 0 (strict). Operators whose
695    /// fleet may observe clock drift opt in by constructing the
696    /// cache via [`Self::with_clock_skew`] — see
697    /// [`TOKEN_CLOCK_SKEW_SECS_RECOMMENDED`] for the recommended
698    /// value.
699    clock_skew_secs: u64,
700    /// Set the first time a WILDCARD-scoped token is inserted.
701    /// [`Self::check`] consults this before falling through to
702    /// the wildcard slot — caches that never receive a wildcard
703    /// (the common case for most subjects) skip the second
704    /// `tokens.get` entirely on every miss. Monotonic: never
705    /// cleared; a wildcard later evicted by `evict_expired` just
706    /// means the fallback walks an empty slot, which is cheap.
707    wildcard_inserted: AtomicBool,
708}
709
710/// Per-issuer revocation floor. Bumping an issuer's floor invalidates
711/// every outstanding token from that issuer — including delegated
712/// children — without needing to enumerate them. Issuers ship the
713/// floor out-of-band alongside their public key when rotation matters.
714#[derive(Debug, Default)]
715pub struct RevocationRegistry {
716    floors: DashMap<[u8; 32], u32>,
717}
718
719impl RevocationRegistry {
720    /// Create an empty registry (every issuer's floor is implicitly 0).
721    pub fn new() -> Self {
722        Self::default()
723    }
724
725    /// Set the floor for an issuer. Tokens with
726    /// `issuer_generation < generation` are rejected on the next
727    /// [`TokenCache::check`]. The call is monotonic: bumping with a
728    /// value <= the current floor is a no-op (prevents accidental
729    /// un-revocation).
730    pub fn revoke_below(&self, issuer: &EntityId, generation: u32) {
731        let key = *issuer.as_bytes();
732        // Use entry::and_modify + or_insert so the merge is atomic
733        // against concurrent revoke_below calls on the same issuer.
734        self.floors
735            .entry(key)
736            .and_modify(|cur| {
737                if generation > *cur {
738                    *cur = generation;
739                }
740            })
741            .or_insert(generation);
742    }
743
744    /// Current floor for an issuer (0 if unset).
745    pub fn floor(&self, issuer: &EntityId) -> u32 {
746        self.floors
747            .get(issuer.as_bytes())
748            .map(|r| *r.value())
749            .unwrap_or(0)
750    }
751
752    /// Returns true if `token` is below its issuer's floor.
753    #[inline]
754    pub fn is_revoked(&self, token: &PermissionToken) -> bool {
755        token.issuer_generation < self.floor(&token.issuer)
756    }
757}
758
759impl TokenCache {
760    /// Create an empty token cache with a fresh revocation registry
761    /// and strict (zero) clock-skew tolerance.
762    pub fn new() -> Self {
763        Self {
764            tokens: DashMap::new(),
765            revocation: Arc::new(RevocationRegistry::new()),
766            clock_skew_secs: 0,
767            wildcard_inserted: AtomicBool::new(false),
768        }
769    }
770
771    /// Create an empty token cache with the supplied clock-skew
772    /// tolerance (in seconds) applied to every [`Self::check`]
773    /// time-bound evaluation. See
774    /// [`TOKEN_CLOCK_SKEW_SECS_RECOMMENDED`] for the production-
775    /// recommended value. The tolerance is clamped to
776    /// [`MAX_TOKEN_CLOCK_SKEW_SECS`] so a misconfiguration can't widen
777    /// the validity window without bound.
778    pub fn with_clock_skew(skew_secs: u64) -> Self {
779        Self {
780            tokens: DashMap::new(),
781            revocation: Arc::new(RevocationRegistry::new()),
782            clock_skew_secs: skew_secs.min(MAX_TOKEN_CLOCK_SKEW_SECS),
783            wildcard_inserted: AtomicBool::new(false),
784        }
785    }
786
787    /// Create an empty token cache that shares the supplied
788    /// revocation registry. Use this when several caches in the
789    /// same process must observe the same revocation floors (e.g.
790    /// per-channel caches that all need to honour issuer-wide
791    /// rotation).
792    pub fn with_revocation_registry(revocation: Arc<RevocationRegistry>) -> Self {
793        Self {
794            tokens: DashMap::new(),
795            revocation,
796            clock_skew_secs: 0,
797            wildcard_inserted: AtomicBool::new(false),
798        }
799    }
800
801    /// Set the cache's clock-skew tolerance. Tokens cleared the
802    /// freshness checks in [`Self::check`] are admitted while
803    /// `now >= not_before - skew` AND `now < not_after + skew`.
804    /// Default is 0 (strict). The value is clamped to
805    /// [`MAX_TOKEN_CLOCK_SKEW_SECS`].
806    pub fn set_clock_skew(&mut self, skew_secs: u64) {
807        self.clock_skew_secs = skew_secs.min(MAX_TOKEN_CLOCK_SKEW_SECS);
808    }
809
810    /// Current clock-skew tolerance (seconds).
811    pub fn clock_skew_secs(&self) -> u64 {
812        self.clock_skew_secs
813    }
814
815    /// Borrow the cache's revocation registry. Use this to drive
816    /// floor bumps without holding a separate handle.
817    pub fn revocation(&self) -> &Arc<RevocationRegistry> {
818        &self.revocation
819    }
820
821    /// Insert a token into the cache after verifying its signature.
822    ///
823    /// Returns an error if the token's signature is invalid. This prevents
824    /// self-signed or tampered tokens from being cached.
825    ///
826    /// Tokens with distinct scope bitfields for the same
827    /// `(subject, channel_hash)` are stored side-by-side.
828    /// A new token with the same scope as an existing entry
829    /// **replaces** the existing one — latest-issued wins so
830    /// refreshing via re-issue doesn't leak growth.
831    pub fn insert(&self, token: PermissionToken) -> Result<(), TokenError> {
832        token.verify()?;
833        self.insert_unchecked(token);
834        Ok(())
835    }
836
837    /// Insert a token without verification (for trusted internal use).
838    ///
839    /// Only use this when the token is known to be valid (e.g., just issued locally).
840    ///
841    /// WILDCARD-scoped tokens are always stored under the dedicated
842    /// wildcard slot (`channel_hash = 0`) regardless of the token's
843    /// own `channel_hash` field — that slot is where `check()` looks
844    /// for a cross-channel fallback. Non-wildcard tokens live in
845    /// their exact `channel_hash` slot.
846    ///
847    /// Bounded by [`MAX_TOKEN_SLOTS`] and
848    /// [`MAX_TOKENS_PER_SLOT`]. When the slot cap is hit, novel
849    /// keys are silently dropped (existing slot keys still
850    /// refresh); when the within-slot cap is hit, novel scope
851    /// bitfields are silently dropped (existing-scope refresh
852    /// still wins). `evict_expired` reclaims slots as tokens
853    /// lapse, restoring admission.
854    pub fn insert_unchecked(&self, token: PermissionToken) {
855        let is_wildcard = token.scope.contains(TokenScope::WILDCARD);
856        let slot_channel = if is_wildcard { 0 } else { token.channel_hash };
857        let key = (*token.subject.as_bytes(), slot_channel);
858        if is_wildcard {
859            // Latch the wildcard-present flag so `check` knows to
860            // walk the wildcard slot. Once set, never cleared:
861            // a subsequent eviction just means the fallback walks
862            // an empty slot, which is cheap.
863            self.wildcard_inserted.store(true, AtomicOrdering::Relaxed);
864        }
865
866        // Slot cap: only refuse NOVEL keys at the cap so existing
867        // peers' token refreshes still work under flood pressure.
868        // The cap is enforced AFTER releasing the per-shard entry
869        // lock — calling `self.tokens.len()` while holding the
870        // entry's write guard would deadlock on our own shard
871        // (DashMap's `len` walks every shard's lock). We accept a
872        // brief observable overshoot — the inserted token is valid
873        // and short-lived between `insert` and `remove` — in
874        // exchange for guaranteed convergence. Pre-fix, a parallel
875        // `contains_key` + `len` pre-check let N callers all see
876        // `len < cap` and overshoot by N, with no rollback.
877        let inserted_novel_key = {
878            let mut entry = self.tokens.entry(key).or_default();
879            let was_empty = entry.is_empty();
880            // Replace any existing token with exactly the same scope;
881            // otherwise push so distinct-scope tokens coexist.
882            if let Some(slot) = entry.iter_mut().find(|t| t.scope == token.scope) {
883                *slot = token;
884            } else if entry.len() < MAX_TOKENS_PER_SLOT {
885                // Within-slot cap: drop novel-scope tokens when the
886                // slot is already at capacity. Refresh of an existing
887                // scope still hits the branch above, so this only
888                // fires on attempts to stack a new scope.
889                entry.push(token);
890            }
891            was_empty
892        };
893
894        // Post-insert rollback: if we just admitted a fresh slot
895        // key and the cache is now over the soft cap, remove the
896        // slot we inserted. Concurrent racers all hit this branch
897        // and converge to `len() <= MAX_TOKEN_SLOTS`.
898        if inserted_novel_key && self.tokens.len() > MAX_TOKEN_SLOTS {
899            self.tokens.remove(&key);
900        }
901    }
902
903    /// Check if an entity is authorized for an action on a channel.
904    ///
905    /// Returns `Ok(())` if any cached token for this subject grants
906    /// `action`, else an error. Walks the exact-channel slot first,
907    /// then the wildcard (`channel_hash = 0`) slot. Within a slot,
908    /// any valid token that authorizes the requested action wins —
909    /// an expired or otherwise-invalid token in the same slot is
910    /// ignored, not blocking.
911    pub fn check(
912        &self,
913        subject: &EntityId,
914        action: TokenScope,
915        channel_hash: ChannelHash,
916    ) -> Result<(), TokenError> {
917        // Try exact channel match first
918        if let Some(slot) = self.tokens.get(&(*subject.as_bytes(), channel_hash)) {
919            if slot.value().iter().any(|t| {
920                t.is_valid_with_skew(self.clock_skew_secs).is_ok()
921                        && !self.revocation.is_revoked(t)
922                        // Defence-in-depth: cross-check the token's
923                        // signed `subject` field matches the lookup
924                        // key. Inserts already key by
925                        // `token.subject.as_bytes()`, so this is
926                        // strictly redundant today — but a future
927                        // refactor that ever inserts under a derived
928                        // or aliased key would silently authorize the
929                        // wrong entity here without this check.
930                        && t.subject.as_bytes() == subject.as_bytes()
931                        && t.authorizes(action, channel_hash)
932            }) {
933                return Ok(());
934            }
935        }
936        // Wildcard fast path: skip the second DashMap probe + iter
937        // when no wildcard token has ever been inserted in this
938        // cache. The common case (subject has only channel-bound
939        // tokens) returns NotAuthorized without ever touching the
940        // wildcard slot.
941        if !self.wildcard_inserted.load(AtomicOrdering::Relaxed) {
942            return Err(TokenError::NotAuthorized);
943        }
944        // Try wildcard (channel_hash = 0)
945        if let Some(slot) = self.tokens.get(&(*subject.as_bytes(), 0)) {
946            if slot.value().iter().any(|t| {
947                t.is_valid_with_skew(self.clock_skew_secs).is_ok()
948                    && !self.revocation.is_revoked(t)
949                    && t.subject.as_bytes() == subject.as_bytes()
950                    && t.authorizes(action, channel_hash)
951            }) {
952                return Ok(());
953            }
954        }
955        Err(TokenError::NotAuthorized)
956    }
957
958    /// Fetch any cached token for `(subject, channel_hash)`. Exact
959    /// match only — the wildcard (`channel_hash = 0`) entry is a
960    /// separate key. Returns the first valid token in the slot; if
961    /// none are valid, returns any entry (so callers can still
962    /// inspect for debugging). Callers that need a specific scope
963    /// should use [`Self::check`] instead.
964    pub fn get(&self, subject: &EntityId, channel_hash: ChannelHash) -> Option<PermissionToken> {
965        let slot = self.tokens.get(&(*subject.as_bytes(), channel_hash))?;
966        let tokens = slot.value();
967        // Prefer a currently-valid token; otherwise fall back to
968        // the first entry so callers like `net_identity_lookup_token`
969        // can still inspect it.
970        tokens
971            .iter()
972            .find(|t| t.is_valid().is_ok())
973            .or_else(|| tokens.first())
974            .cloned()
975    }
976
977    /// Remove expired tokens.
978    pub fn evict_expired(&self) {
979        let now = current_timestamp();
980        self.tokens.retain(|_, slot| {
981            slot.retain(|t| t.not_after > now);
982            !slot.is_empty()
983        });
984    }
985
986    /// Total number of cached tokens across all slots.
987    ///
988    /// A slot is keyed by `(subject, channel_hash)` and can hold
989    /// multiple tokens with distinct scopes (e.g. one `PUBLISH` and
990    /// one `SUBSCRIBE` for the same peer-on-channel). An earlier
991    /// storage change from a single `PermissionToken` per slot to
992    /// a `Vec<PermissionToken>` left this method returning the
993    /// slot count instead of the token count — FFI / binding
994    /// metrics that surfaced "tokens cached" silently undercounted
995    /// whenever a slot carried more than one scope. Sum the slot
996    /// lengths so the number matches the observable cache
997    /// contents.
998    pub fn len(&self) -> usize {
999        self.tokens.iter().map(|e| e.value().len()).sum()
1000    }
1001
1002    /// Check if cache is empty.
1003    ///
1004    /// `evict_expired` already drops empty slots, and
1005    /// `insert_unchecked` never creates one, so a zero slot-count
1006    /// and a zero token-count coincide in practice — but checking
1007    /// the slot count keeps `is_empty()` O(1) instead of walking
1008    /// every slot.
1009    pub fn is_empty(&self) -> bool {
1010        self.tokens.is_empty()
1011    }
1012}
1013
1014impl Default for TokenCache {
1015    fn default() -> Self {
1016        Self::new()
1017    }
1018}
1019
1020impl std::fmt::Debug for TokenCache {
1021    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1022        f.debug_struct("TokenCache")
1023            .field("count", &self.tokens.len())
1024            .finish()
1025    }
1026}
1027
1028/// Errors from token operations.
1029#[derive(Debug, Clone, PartialEq, Eq)]
1030pub enum TokenError {
1031    /// Token signature is invalid.
1032    InvalidSignature,
1033    /// Token is not yet valid (before not_before).
1034    NotYetValid,
1035    /// Token has expired (after not_after).
1036    Expired,
1037    /// Delegation depth exhausted.
1038    DelegationExhausted,
1039    /// DELEGATE scope not present in token.
1040    DelegationNotAllowed,
1041    /// No valid token found for the requested action.
1042    NotAuthorized,
1043    /// Wire format is too short or malformed.
1044    InvalidFormat,
1045    /// Issuer/signer keypair is public-only (post-migration zeroize
1046    /// or other read-only construction). The caller's signing
1047    /// operation is not possible.
1048    ReadOnly,
1049    /// `duration_secs == 0` was passed to [`PermissionToken::try_issue`].
1050    ///
1051    /// Pre-fix, a TTL of 0 produced a token with
1052    /// `not_after == not_before`, which every receiver immediately
1053    /// rejects as `Expired`. The signature verifies but every
1054    /// authorization check fails — silently. Reject at issue
1055    /// time so the caller learns about the misuse instead of
1056    /// minting an unusable token.
1057    ZeroTtl,
1058    /// `duration_secs` exceeded [`MAX_TOKEN_TTL_SECS`].
1059    ///
1060    /// An unbounded TTL saturates `not_after` into a token that never
1061    /// expires and can only be retired through the advisory revocation
1062    /// floor. Rejected at issue time so a leaked credential always has
1063    /// a bounded lifetime.
1064    TtlTooLong,
1065}
1066
1067impl std::fmt::Display for TokenError {
1068    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1069        match self {
1070            Self::InvalidSignature => write!(f, "invalid token signature"),
1071            Self::NotYetValid => write!(f, "token not yet valid"),
1072            Self::Expired => write!(f, "token expired"),
1073            Self::DelegationExhausted => write!(f, "delegation depth exhausted"),
1074            Self::DelegationNotAllowed => write!(f, "delegation not allowed by scope"),
1075            Self::NotAuthorized => write!(f, "not authorized"),
1076            Self::InvalidFormat => write!(f, "invalid token format"),
1077            Self::ReadOnly => write!(f, "signer keypair is public-only"),
1078            Self::ZeroTtl => write!(f, "token TTL must be > 0 seconds"),
1079            Self::TtlTooLong => write!(
1080                f,
1081                "token TTL exceeds the maximum of {MAX_TOKEN_TTL_SECS} seconds"
1082            ),
1083        }
1084    }
1085}
1086
1087impl std::error::Error for TokenError {}
1088
1089/// Current unix timestamp in seconds.
1090fn current_timestamp() -> u64 {
1091    SystemTime::now()
1092        .duration_since(UNIX_EPOCH)
1093        .unwrap_or_default()
1094        .as_secs()
1095}
1096
1097#[cfg(test)]
1098mod tests {
1099    use super::*;
1100
1101    #[test]
1102    fn test_issue_and_verify() {
1103        let issuer = EntityKeypair::generate();
1104        let subject = EntityKeypair::generate();
1105
1106        let token = PermissionToken::issue(
1107            &issuer,
1108            subject.entity_id().clone(),
1109            TokenScope::PUBLISH
1110                .union(TokenScope::SUBSCRIBE)
1111                .union(TokenScope::WILDCARD),
1112            0, // channel_hash ignored for WILDCARD tokens
1113            3600,
1114            0,
1115        );
1116
1117        assert!(token.verify().is_ok());
1118        assert!(token.is_valid().is_ok());
1119    }
1120
1121    /// A TTL of 0 must surface as `TokenError::ZeroTtl`,
1122    /// not silently mint a born-expired token. Pre-fix, the
1123    /// caller got a token whose signature verified but every
1124    /// authorization check failed at the receiver — no diagnostic
1125    /// to the issuer.
1126    #[test]
1127    fn try_issue_rejects_zero_ttl() {
1128        let issuer = EntityKeypair::generate();
1129        let subject = EntityKeypair::generate();
1130
1131        let err = PermissionToken::try_issue(
1132            &issuer,
1133            subject.entity_id().clone(),
1134            TokenScope::PUBLISH,
1135            0,
1136            0, // ttl = 0 seconds — the bug
1137            0,
1138        )
1139        .unwrap_err();
1140        assert_eq!(err, TokenError::ZeroTtl, "expected ZeroTtl, got {:?}", err);
1141    }
1142
1143    /// `issue` is the panicking convenience wrapper around
1144    /// `try_issue`. When `try_issue` rejects on a *non*-`ReadOnly`
1145    /// reason (today: `ZeroTtl`), the panic message must name the
1146    /// real cause — not the canned "called with a public-only
1147    /// keypair" string. Pre-fix the wrapper did
1148    /// `.expect("...public-only keypair...")` unconditionally, so a
1149    /// `duration_secs == 0` panic mis-blamed key loading and sent
1150    /// whoever saw the panic chasing the wrong bug.
1151    #[test]
1152    #[should_panic(expected = "duration_secs == 0")]
1153    fn issue_zero_ttl_panic_message_blames_ttl_not_keypair() {
1154        let issuer = EntityKeypair::generate();
1155        let subject = EntityKeypair::generate();
1156        let _ = PermissionToken::issue(
1157            &issuer,
1158            subject.entity_id().clone(),
1159            TokenScope::PUBLISH,
1160            0,
1161            0, // the bug — must panic with a TTL-flavored message
1162            0,
1163        );
1164    }
1165
1166    /// Companion to the above: when the *real* cause is a
1167    /// public-only keypair, the wrapper still panics with the
1168    /// long-standing `ReadOnly`-flavored message, so existing
1169    /// callsites that grep on it keep working.
1170    #[test]
1171    #[should_panic(expected = "public-only keypair")]
1172    fn issue_public_only_keypair_panic_message_blames_keypair() {
1173        let full = EntityKeypair::generate();
1174        let issuer = EntityKeypair::public_only(full.entity_id().clone());
1175        let subject = EntityKeypair::generate();
1176        let _ = PermissionToken::issue(
1177            &issuer,
1178            subject.entity_id().clone(),
1179            TokenScope::PUBLISH,
1180            0,
1181            3600,
1182            0,
1183        );
1184    }
1185
1186    /// TTL of 1 second is the lowest valid
1187    /// value; must mint a token that is `is_valid()` immediately.
1188    #[test]
1189    fn try_issue_accepts_one_second_ttl() {
1190        let issuer = EntityKeypair::generate();
1191        let subject = EntityKeypair::generate();
1192
1193        let token = PermissionToken::try_issue(
1194            &issuer,
1195            subject.entity_id().clone(),
1196            TokenScope::PUBLISH,
1197            0,
1198            1, // 1 second — minimum valid
1199            0,
1200        )
1201        .expect("ttl=1 must mint cleanly (boundary)");
1202        assert!(token.is_valid().is_ok());
1203    }
1204
1205    #[test]
1206    fn test_tampered_token() {
1207        let issuer = EntityKeypair::generate();
1208        let subject = EntityKeypair::generate();
1209
1210        let mut token = PermissionToken::issue(
1211            &issuer,
1212            subject.entity_id().clone(),
1213            TokenScope::PUBLISH,
1214            0,
1215            3600,
1216            0,
1217        );
1218
1219        // Tamper with scope
1220        token.scope = TokenScope::ADMIN;
1221        assert!(token.verify().is_err());
1222    }
1223
1224    #[test]
1225    fn test_expired_token() {
1226        let issuer = EntityKeypair::generate();
1227        let subject = EntityKeypair::generate();
1228
1229        // Mint with the minimum valid TTL (1 second), then
1230        // backdate `not_after` to the past and re-sign — this is
1231        // how we test expiry semantics now that try_issue rejects
1232        // `duration_secs == 0` outright.
1233        let mut token = PermissionToken::issue(
1234            &issuer,
1235            subject.entity_id().clone(),
1236            TokenScope::PUBLISH,
1237            0,
1238            1,
1239            0,
1240        );
1241        token.not_after = 0;
1242        let payload = token.signed_payload();
1243        token.signature = issuer.sign(&payload).to_bytes();
1244
1245        assert!(token.verify().is_ok(), "signature is valid");
1246        // `not_after = 0` and the inclusive-expiry convention
1247        // (`now >= not_after`) says "expired" — `is_expired` and
1248        // `is_valid` must agree at the boundary.
1249        assert!(
1250            token.is_expired(),
1251            "backdated token must report expired under inclusive-expiry",
1252        );
1253        assert!(
1254            matches!(token.is_valid(), Err(TokenError::Expired)),
1255            "is_valid must agree with is_expired at the boundary",
1256        );
1257    }
1258
1259    /// Regression for a cubic-flagged P3: `is_valid` / `is_expired`
1260    /// used strict `>` against `not_after`, so the boundary second
1261    /// (`now == not_after`) still counted as valid. The cache's
1262    /// `evict_expired` has always used the inclusive convention
1263    /// (drops at the boundary), so tokens survived one second longer
1264    /// in the "hot" caller-facing checks than in the sweep — a
1265    /// quiet mismatch that also gave every `issue(duration=N)` an
1266    /// effective lifetime of `N+1` seconds.
1267    ///
1268    #[test]
1269    fn is_valid_and_is_expired_agree_at_not_after_boundary() {
1270        let issuer = EntityKeypair::generate();
1271        let subject = EntityKeypair::generate();
1272
1273        let mut token = PermissionToken::issue(
1274            &issuer,
1275            subject.entity_id().clone(),
1276            TokenScope::PUBLISH,
1277            0,
1278            3600,
1279            0,
1280        );
1281        // Force the boundary deterministically — set not_after to
1282        // the current wall-clock second and re-sign so `is_valid`
1283        // still passes its signature check.
1284        token.not_after = current_timestamp();
1285        let payload = token.signed_payload();
1286        token.signature = issuer.sign(&payload).to_bytes();
1287
1288        assert!(
1289            token.is_expired(),
1290            "is_expired must return true at now == not_after (inclusive)",
1291        );
1292        assert!(
1293            matches!(token.is_valid(), Err(TokenError::Expired)),
1294            "is_valid must agree: Expired at now == not_after (strict)",
1295        );
1296
1297        // And the cache eviction path must also drop it — same
1298        // boundary convention.
1299        let cache = TokenCache::new();
1300        cache.insert_unchecked(token);
1301        cache.evict_expired();
1302        assert_eq!(
1303            cache.len(),
1304            0,
1305            "evict_expired must drop a boundary token — all three code paths \
1306             (is_valid, is_expired, evict_expired) must agree on the boundary",
1307        );
1308    }
1309
1310    /// Regression for a cubic-flagged bug that hit every FFI binding:
1311    /// `token_is_expired` used to call `is_valid()` and match on
1312    /// `Err(Expired)`, which short-circuited on signature failure.
1313    /// A tampered + expired token therefore returned `false` ("not
1314    /// expired") even though the wall-clock was past `not_after`.
1315    /// `is_expired()` must be a pure time check, independent of the
1316    /// signature.
1317    #[test]
1318    fn is_expired_ignores_signature_tampering() {
1319        let issuer = EntityKeypair::generate();
1320        let subject = EntityKeypair::generate();
1321
1322        // Fresh token — not expired.
1323        let mut token = PermissionToken::issue(
1324            &issuer,
1325            subject.entity_id().clone(),
1326            TokenScope::PUBLISH,
1327            0,
1328            3600,
1329            0,
1330        );
1331        assert!(!token.is_expired(), "fresh token is not expired");
1332
1333        // Construct the bug scenario: backdate `not_after` into the
1334        // past AND flip a byte in the signature. In practice a
1335        // tampered packet would arrive over the wire; here we
1336        // mutate in place so the test doesn't depend on sleeps.
1337        // Both mutations land outside what `verify()` recomputes —
1338        // not_after is part of the signed payload, so verify() is
1339        // already going to fail; the point is that `is_expired()`
1340        // doesn't care.
1341        token.not_after = 0;
1342        token.signature[0] ^= 0xFF;
1343
1344        // Signature fails (expected).
1345        assert!(
1346            token.verify().is_err(),
1347            "mutated payload / signature must fail verify",
1348        );
1349
1350        // Pre-fix pattern: `matches!(is_valid(), Err(Expired))`.
1351        // `is_valid()` short-circuits on the signature failure and
1352        // returns `Err(InvalidSignature)`, so the match returns
1353        // false — this is exactly the bug Cubic flagged.
1354        assert!(
1355            !matches!(token.is_valid(), Err(TokenError::Expired)),
1356            "captures the pre-fix pattern: is_valid() short-circuits \
1357             on signature, never reaches the time check",
1358        );
1359
1360        // Post-fix: `is_expired()` compares time directly and
1361        // reports `true` regardless of signature state.
1362        assert!(
1363            token.is_expired(),
1364            "is_expired() must be a pure time check, independent \
1365             of signature validity",
1366        );
1367    }
1368
1369    #[test]
1370    fn test_channel_filter() {
1371        let issuer = EntityKeypair::generate();
1372        let subject = EntityKeypair::generate();
1373
1374        let token = PermissionToken::issue(
1375            &issuer,
1376            subject.entity_id().clone(),
1377            TokenScope::PUBLISH,
1378            0xABCD, // specific channel
1379            3600,
1380            0,
1381        );
1382
1383        assert!(token.authorizes(TokenScope::PUBLISH, 0xABCD));
1384        assert!(!token.authorizes(TokenScope::PUBLISH, 0x1234)); // wrong channel
1385        assert!(!token.authorizes(TokenScope::SUBSCRIBE, 0xABCD)); // wrong action
1386    }
1387
1388    #[test]
1389    fn test_wildcard_channel() {
1390        let issuer = EntityKeypair::generate();
1391        let subject = EntityKeypair::generate();
1392
1393        // Wildcard tokens must explicitly opt in via the WILDCARD
1394        // scope bit — the old "channel_hash == 0 implies wildcard"
1395        // overload no longer applies.
1396        let token = PermissionToken::issue(
1397            &issuer,
1398            subject.entity_id().clone(),
1399            TokenScope::PUBLISH.union(TokenScope::WILDCARD),
1400            0,
1401            3600,
1402            0,
1403        );
1404
1405        assert!(token.authorizes(TokenScope::PUBLISH, 0xABCD));
1406        assert!(token.authorizes(TokenScope::PUBLISH, 0x1234));
1407        assert!(token.authorizes(TokenScope::PUBLISH, 0));
1408    }
1409
1410    #[test]
1411    fn test_regression_channel_hash_zero_is_not_wildcard() {
1412        // Regression (MEDIUM, BUGS.md): a token with `channel_hash = 0`
1413        // but no WILDCARD scope bit must NOT authorize arbitrary
1414        // channels. A legitimate channel whose canonical xxh3-derived
1415        // `ChannelHash` happens to hash to 0 would otherwise turn a
1416        // narrowly-scoped token into a universal grant — and since
1417        // xxh3 is non-cryptographic, an attacker able to register
1418        // names could brute-force such a collision (cheap at the
1419        // wire u16, but reachable at the canonical u64 too with
1420        // enough names).
1421        let issuer = EntityKeypair::generate();
1422        let subject = EntityKeypair::generate();
1423
1424        let token = PermissionToken::issue(
1425            &issuer,
1426            subject.entity_id().clone(),
1427            TokenScope::PUBLISH, // no WILDCARD
1428            0,                   // channel_hash 0 — pretend some channel hashed here
1429            3600,
1430            0,
1431        );
1432
1433        // Token authorizes channel 0 only (exact match), not other channels.
1434        assert!(token.authorizes(TokenScope::PUBLISH, 0));
1435        assert!(
1436            !token.authorizes(TokenScope::PUBLISH, 0xABCD),
1437            "channel_hash=0 without WILDCARD must not grant access to arbitrary channels"
1438        );
1439        assert!(
1440            !token.authorizes(TokenScope::PUBLISH, 0x1234),
1441            "channel_hash=0 without WILDCARD must not grant access to arbitrary channels"
1442        );
1443    }
1444
1445    #[test]
1446    fn test_delegation() {
1447        let root = EntityKeypair::generate();
1448        let node_a = EntityKeypair::generate();
1449        let node_b = EntityKeypair::generate();
1450
1451        // Root issues to A with delegation depth 2
1452        let token_a = PermissionToken::issue(
1453            &root,
1454            node_a.entity_id().clone(),
1455            TokenScope::ALL,
1456            0,
1457            3600,
1458            2,
1459        );
1460        assert!(token_a.is_valid().is_ok());
1461
1462        // A delegates to B with restricted scope
1463        let token_b = token_a
1464            .delegate(
1465                &node_a,
1466                node_b.entity_id().clone(),
1467                TokenScope::PUBLISH.union(TokenScope::DELEGATE),
1468            )
1469            .unwrap();
1470
1471        assert!(token_b.is_valid().is_ok());
1472        assert_eq!(token_b.delegation_depth, 1);
1473        assert!(token_b.authorizes(TokenScope::PUBLISH, 0));
1474        assert!(!token_b.authorizes(TokenScope::ADMIN, 0)); // restricted away
1475    }
1476
1477    #[test]
1478    fn test_delegation_depth_exhausted() {
1479        let root = EntityKeypair::generate();
1480        let node_a = EntityKeypair::generate();
1481        let node_b = EntityKeypair::generate();
1482
1483        let token = PermissionToken::issue(
1484            &root,
1485            node_a.entity_id().clone(),
1486            TokenScope::ALL,
1487            0,
1488            3600,
1489            0, // no delegation
1490        );
1491
1492        let result = token.delegate(&node_a, node_b.entity_id().clone(), TokenScope::PUBLISH);
1493        assert_eq!(result.unwrap_err(), TokenError::DelegationExhausted);
1494    }
1495
1496    #[test]
1497    fn test_delegation_wrong_signer() {
1498        let root = EntityKeypair::generate();
1499        let node_a = EntityKeypair::generate();
1500        let node_b = EntityKeypair::generate();
1501        let imposter = EntityKeypair::generate();
1502
1503        let token = PermissionToken::issue(
1504            &root,
1505            node_a.entity_id().clone(),
1506            TokenScope::ALL,
1507            0,
1508            3600,
1509            1,
1510        );
1511
1512        // Imposter tries to delegate A's token
1513        let result = token.delegate(&imposter, node_b.entity_id().clone(), TokenScope::PUBLISH);
1514        assert_eq!(result.unwrap_err(), TokenError::NotAuthorized);
1515    }
1516
1517    #[test]
1518    fn test_serialization_roundtrip() {
1519        let issuer = EntityKeypair::generate();
1520        let subject = EntityKeypair::generate();
1521
1522        let token = PermissionToken::issue(
1523            &issuer,
1524            subject.entity_id().clone(),
1525            TokenScope::PUBLISH.union(TokenScope::SUBSCRIBE),
1526            0xBEEF,
1527            3600,
1528            3,
1529        );
1530
1531        let bytes = token.to_bytes();
1532        assert_eq!(bytes.len(), PermissionToken::WIRE_SIZE);
1533
1534        let parsed = PermissionToken::from_bytes(&bytes).unwrap();
1535        assert!(parsed.verify().is_ok());
1536        assert_eq!(parsed.issuer, token.issuer);
1537        assert_eq!(parsed.subject, token.subject);
1538        assert_eq!(parsed.scope.bits(), token.scope.bits());
1539        assert_eq!(parsed.channel_hash, 0xBEEF);
1540        assert_eq!(parsed.delegation_depth, 3);
1541        assert_eq!(parsed.nonce, token.nonce);
1542    }
1543
1544    /// `TokenScope::contains(NONE)` must return `false` — the bit
1545    /// identity `(bits & 0) == 0` is unconditionally true, so any
1546    /// token would otherwise "contain" the no-op action. A caller
1547    /// that builds `action: TokenScope` from external input (e.g. a
1548    /// wire `u32` masked into a smaller subset) would then receive
1549    /// a blanket `true` against any token whenever the masked input
1550    /// happened to land on `NONE`.
1551    #[test]
1552    fn token_scope_does_not_contain_none() {
1553        // Any defined scope must NOT contain NONE.
1554        for s in [
1555            TokenScope::PUBLISH,
1556            TokenScope::SUBSCRIBE,
1557            TokenScope::ADMIN,
1558            TokenScope::DELEGATE,
1559            TokenScope::WILDCARD,
1560            TokenScope::ALL,
1561            TokenScope::PUBLISH.union(TokenScope::SUBSCRIBE),
1562        ] {
1563            assert!(
1564                !s.contains(TokenScope::NONE),
1565                "scope {:?} must not contain NONE",
1566                s.bits(),
1567            );
1568        }
1569        // Even NONE itself does not "contain" NONE — the question is
1570        // "do you authorize this action," and the no-op action is
1571        // never authorized.
1572        assert!(
1573            !TokenScope::NONE.contains(TokenScope::NONE),
1574            "NONE.contains(NONE) must be false (no token authorizes the no-op action)",
1575        );
1576
1577        // Sanity: contains is still correct for non-NONE arguments.
1578        assert!(TokenScope::ALL.contains(TokenScope::PUBLISH));
1579        assert!(!TokenScope::PUBLISH.contains(TokenScope::ADMIN));
1580        assert!(TokenScope::PUBLISH
1581            .union(TokenScope::SUBSCRIBE)
1582            .contains(TokenScope::SUBSCRIBE));
1583    }
1584
1585    #[test]
1586    fn test_token_cache() {
1587        let issuer = EntityKeypair::generate();
1588        let subject = EntityKeypair::generate();
1589
1590        let cache = TokenCache::new();
1591
1592        let token = PermissionToken::issue(
1593            &issuer,
1594            subject.entity_id().clone(),
1595            TokenScope::PUBLISH,
1596            0xABCD,
1597            3600,
1598            0,
1599        );
1600        let _ = cache.insert(token);
1601
1602        assert_eq!(cache.len(), 1);
1603
1604        // Should find the token
1605        assert!(cache
1606            .check(subject.entity_id(), TokenScope::PUBLISH, 0xABCD)
1607            .is_ok());
1608
1609        // Wrong channel
1610        assert!(cache
1611            .check(subject.entity_id(), TokenScope::PUBLISH, 0x1234)
1612            .is_err());
1613
1614        // Wrong action
1615        assert!(cache
1616            .check(subject.entity_id(), TokenScope::ADMIN, 0xABCD)
1617            .is_err());
1618
1619        // Unknown entity
1620        let unknown = EntityKeypair::generate();
1621        assert!(cache
1622            .check(unknown.entity_id(), TokenScope::PUBLISH, 0xABCD)
1623            .is_err());
1624    }
1625
1626    /// Bumping an issuer's revocation floor invalidates every
1627    /// outstanding token from that issuer (including delegated
1628    /// children, which inherit `issuer_generation` from their
1629    /// parent). Pre-fix there was no revocation at all — a leaked
1630    /// parent token's children outlived any "rotate" intent on the
1631    /// parent's key.
1632    #[test]
1633    fn revocation_floor_bump_invalidates_outstanding_tokens() {
1634        let issuer = EntityKeypair::generate();
1635        let subject = EntityKeypair::generate();
1636
1637        let cache = TokenCache::new();
1638        let token = PermissionToken::issue(
1639            &issuer,
1640            subject.entity_id().clone(),
1641            TokenScope::PUBLISH,
1642            0xABCD_EF00_AAAA_BBBB,
1643            3600,
1644            0,
1645        );
1646        // Tokens issued via try_issue default to generation 0.
1647        assert_eq!(token.issuer_generation, 0);
1648        cache.insert(token).expect("token should verify");
1649
1650        // Pre-revoke: check passes.
1651        assert!(cache
1652            .check(
1653                subject.entity_id(),
1654                TokenScope::PUBLISH,
1655                0xABCD_EF00_AAAA_BBBB,
1656            )
1657            .is_ok());
1658
1659        // Bump the floor to 1 — every outstanding gen-0 token is now
1660        // below the floor.
1661        cache.revocation().revoke_below(issuer.entity_id(), 1);
1662
1663        // Same check now fails.
1664        assert!(cache
1665            .check(
1666                subject.entity_id(),
1667                TokenScope::PUBLISH,
1668                0xABCD_EF00_AAAA_BBBB,
1669            )
1670            .is_err());
1671    }
1672
1673    /// `is_valid_with_skew` widens both bounds by the supplied
1674    /// tolerance. Pinned at the boundary: half-a-window in admits;
1675    /// just past the window rejects. Both the past (Expired) and
1676    /// future (NotYetValid) sides are exercised — the skew applies
1677    /// symmetrically.
1678    #[test]
1679    fn is_valid_with_skew_accepts_inside_window_rejects_outside() {
1680        let issuer = EntityKeypair::generate();
1681        let subject = EntityKeypair::generate();
1682        let skew: u64 = 60;
1683
1684        let mut token = PermissionToken::issue(
1685            &issuer,
1686            subject.entity_id().clone(),
1687            TokenScope::PUBLISH,
1688            0,
1689            3600,
1690            0,
1691        );
1692
1693        // Token expired half-a-skew-window ago: still valid under
1694        // the configured skew, but strict `is_valid` rejects.
1695        token.not_after = current_timestamp() - skew / 2;
1696        let payload = token.signed_payload();
1697        token.signature = issuer.sign(&payload).to_bytes();
1698        assert!(
1699            token.is_valid_with_skew(skew).is_ok(),
1700            "is_valid_with_skew must accept tokens inside the past-skew window",
1701        );
1702        assert!(
1703            matches!(token.is_valid(), Err(TokenError::Expired)),
1704            "strict is_valid must reject the same token",
1705        );
1706
1707        // Token expired past the window: must reject under both.
1708        token.not_after = current_timestamp() - skew - 5;
1709        let payload = token.signed_payload();
1710        token.signature = issuer.sign(&payload).to_bytes();
1711        assert!(
1712            matches!(token.is_valid_with_skew(skew), Err(TokenError::Expired)),
1713            "is_valid_with_skew must reject tokens past the past-skew window",
1714        );
1715
1716        // Token not-yet-valid by half a window: accept under skew,
1717        // reject under strict.
1718        token.not_after = current_timestamp() + 3600;
1719        token.not_before = current_timestamp() + skew / 2;
1720        let payload = token.signed_payload();
1721        token.signature = issuer.sign(&payload).to_bytes();
1722        assert!(
1723            token.is_valid_with_skew(skew).is_ok(),
1724            "is_valid_with_skew must accept tokens inside the future-skew window",
1725        );
1726        assert!(
1727            matches!(token.is_valid(), Err(TokenError::NotYetValid)),
1728            "strict is_valid must reject the same token",
1729        );
1730
1731        // Token not-yet-valid past the window: must reject under both.
1732        token.not_before = current_timestamp() + skew + 5;
1733        let payload = token.signed_payload();
1734        token.signature = issuer.sign(&payload).to_bytes();
1735        assert!(
1736            matches!(token.is_valid_with_skew(skew), Err(TokenError::NotYetValid),),
1737            "is_valid_with_skew must reject tokens past the future-skew window",
1738        );
1739    }
1740
1741    /// Caches that never receive a WILDCARD token skip the
1742    /// wildcard-slot fallback on `check` miss. The fast path is
1743    /// observable via the public `len` / `check` API: insert a
1744    /// channel-bound token, query for the wrong channel, and
1745    /// confirm both the answer and (indirectly) that the
1746    /// wildcard-slot probe was elided.
1747    #[test]
1748    fn check_skips_wildcard_slot_when_no_wildcard_ever_inserted() {
1749        let issuer = EntityKeypair::generate();
1750        let subject = EntityKeypair::generate();
1751        let cache = TokenCache::new();
1752
1753        let token = PermissionToken::issue(
1754            &issuer,
1755            subject.entity_id().clone(),
1756            TokenScope::PUBLISH,
1757            0xAAAA,
1758            3600,
1759            0,
1760        );
1761        cache.insert(token).unwrap();
1762
1763        // Wrong channel: must NotAuthorize. Pre-fix this took the
1764        // wildcard fallback path; post-fix it returns immediately
1765        // after the exact-slot miss because no wildcard was ever
1766        // inserted. Behaviour is identical (the slow path would
1767        // also have miss-then-deny), but cost is one DashMap probe
1768        // + iter cheaper.
1769        assert!(cache
1770            .check(subject.entity_id(), TokenScope::PUBLISH, 0xBBBB)
1771            .is_err());
1772    }
1773
1774    /// Once a wildcard token has been inserted, `check` always
1775    /// walks the wildcard slot — the fast-path flag is set on
1776    /// insert and never cleared, so a later eviction doesn't
1777    /// disable the wildcard scan.
1778    #[test]
1779    fn check_walks_wildcard_slot_after_any_wildcard_insert() {
1780        let issuer = EntityKeypair::generate();
1781        let subject = EntityKeypair::generate();
1782        let cache = TokenCache::new();
1783
1784        let wildcard = PermissionToken::issue(
1785            &issuer,
1786            subject.entity_id().clone(),
1787            TokenScope::PUBLISH.union(TokenScope::WILDCARD),
1788            0,
1789            3600,
1790            0,
1791        );
1792        cache.insert(wildcard).unwrap();
1793
1794        // The wildcard token authorizes every channel, so a check
1795        // for any channel succeeds.
1796        assert!(cache
1797            .check(subject.entity_id(), TokenScope::PUBLISH, 0xDEAD)
1798            .is_ok());
1799        assert!(cache
1800            .check(subject.entity_id(), TokenScope::PUBLISH, 0xBEEF)
1801            .is_ok());
1802    }
1803
1804    /// TokenCache::with_clock_skew applies the configured tolerance
1805    /// to every `check` call. A token whose `not_after` is just
1806    /// past now is rejected by a strict cache but admitted by a
1807    /// cache constructed with sufficient skew.
1808    #[test]
1809    fn token_cache_with_clock_skew_admits_inside_window() {
1810        let issuer = EntityKeypair::generate();
1811        let subject = EntityKeypair::generate();
1812        let channel: ChannelHash = 0x1234_5678_9ABC_DEF0;
1813
1814        let mut token = PermissionToken::issue(
1815            &issuer,
1816            subject.entity_id().clone(),
1817            TokenScope::PUBLISH,
1818            channel,
1819            3600,
1820            0,
1821        );
1822        // Expired 5 s ago.
1823        token.not_after = current_timestamp() - 5;
1824        let payload = token.signed_payload();
1825        token.signature = issuer.sign(&payload).to_bytes();
1826
1827        // Strict cache rejects.
1828        let strict = TokenCache::new();
1829        strict.insert_unchecked(token.clone());
1830        assert!(strict
1831            .check(subject.entity_id(), TokenScope::PUBLISH, channel)
1832            .is_err());
1833
1834        // Lenient cache admits.
1835        let lenient = TokenCache::with_clock_skew(60);
1836        lenient.insert_unchecked(token);
1837        assert!(lenient
1838            .check(subject.entity_id(), TokenScope::PUBLISH, channel)
1839            .is_ok());
1840    }
1841
1842    /// Defence-in-depth: `TokenCache::check` cross-checks the
1843    /// token's signed `subject` field against the lookup key. The
1844    /// invariant holds today because inserts key by
1845    /// `token.subject.as_bytes()`, but a future refactor that ever
1846    /// keys by a derived value would otherwise silently authorize
1847    /// the wrong entity. The check fires here by directly inserting
1848    /// a token under a foreign subject — `insert_unchecked` is
1849    /// deliberately deliberate enough to bypass the normal keying
1850    /// invariant.
1851    #[test]
1852    fn check_rejects_token_keyed_under_mismatched_subject() {
1853        let issuer = EntityKeypair::generate();
1854        let real_subject = EntityKeypair::generate();
1855        let foreign_subject = EntityKeypair::generate();
1856        let channel: ChannelHash = 0x1234_5678_9ABC_DEF0;
1857
1858        let token = PermissionToken::issue(
1859            &issuer,
1860            real_subject.entity_id().clone(),
1861            TokenScope::PUBLISH,
1862            channel,
1863            3600,
1864            0,
1865        );
1866
1867        // Manually inject the token into the slot keyed by the
1868        // foreign subject's bytes. This is what a buggy refactor of
1869        // the keying scheme would produce.
1870        let cache = TokenCache::new();
1871        cache
1872            .tokens
1873            .entry((*foreign_subject.entity_id().as_bytes(), channel))
1874            .or_default()
1875            .push(token);
1876
1877        // The cache slot exists for foreign_subject, but the inner
1878        // token's signed `subject` is real_subject. Check must
1879        // refuse — pre-fix the predicate matched any token in the
1880        // slot regardless of the inner field.
1881        assert!(cache
1882            .check(foreign_subject.entity_id(), TokenScope::PUBLISH, channel)
1883            .is_err());
1884    }
1885
1886    /// Revocation floor is monotonic: bumping with a lower value is
1887    /// a no-op. Prevents accidental un-revocation under racing
1888    /// rotation attempts.
1889    #[test]
1890    fn revocation_floor_is_monotonic() {
1891        let issuer = EntityKeypair::generate();
1892        let registry = RevocationRegistry::new();
1893        registry.revoke_below(issuer.entity_id(), 5);
1894        assert_eq!(registry.floor(issuer.entity_id()), 5);
1895        // Lower value: no-op.
1896        registry.revoke_below(issuer.entity_id(), 2);
1897        assert_eq!(registry.floor(issuer.entity_id()), 5);
1898        // Higher value: advances.
1899        registry.revoke_below(issuer.entity_id(), 10);
1900        assert_eq!(registry.floor(issuer.entity_id()), 10);
1901    }
1902
1903    /// A delegated child must inherit its parent's
1904    /// `issuer_generation` so a floor bump on the issuer's key
1905    /// invalidates the child transitively without a chain walk.
1906    #[test]
1907    fn delegate_inherits_parent_issuer_generation() {
1908        let issuer = EntityKeypair::generate();
1909        let intermediate = EntityKeypair::generate();
1910        let leaf = EntityKeypair::generate();
1911
1912        let mut parent = PermissionToken::issue(
1913            &issuer,
1914            intermediate.entity_id().clone(),
1915            TokenScope::PUBLISH.union(TokenScope::DELEGATE),
1916            0xCAFE_BABE,
1917            3600,
1918            2,
1919        );
1920        // Simulate a parent issued at generation 7.
1921        parent.issuer_generation = 7;
1922        // Re-sign so the modified payload still verifies — bypass
1923        // the public `delegate` because it's the issuer's keypair
1924        // that signs the parent.
1925        let payload = parent.signed_payload();
1926        parent.signature = issuer.sign(&payload).to_bytes();
1927
1928        let child = parent
1929            .delegate(&intermediate, leaf.entity_id().clone(), TokenScope::PUBLISH)
1930            .expect("delegate should succeed");
1931        assert_eq!(
1932            child.issuer_generation, 7,
1933            "child must inherit parent's issuer_generation"
1934        );
1935    }
1936
1937    /// A token bound to channel hash `H_a` (u64) must NOT authorize a
1938    /// channel whose hash `H_b` collides with `H_a` only in the low
1939    /// 32 bits. Pre-widening, ChannelHash was xxh3_64(name) as u32, so
1940    /// any two names sharing the low 32 bits of the xxh3_64 digest
1941    /// hashed equal. Now the cache and `authorizes` consume the full
1942    /// u64, so two hashes that differ in the high 32 bits cannot
1943    /// authorize each other — closing the targeted-collision attack
1944    /// on the token fast path.
1945    #[test]
1946    fn token_cache_check_distinguishes_u32_aliased_u64_hashes() {
1947        let issuer = EntityKeypair::generate();
1948        let subject = EntityKeypair::generate();
1949
1950        let cache = TokenCache::new();
1951        // Two hashes that share the low 32 bits but differ in the high
1952        // 32 bits — exactly the shape a 2^32 grinding attack would have
1953        // produced under the old `as u32` cast.
1954        let h_a: ChannelHash = 0x0000_0001_DEAD_BEEF;
1955        let h_b: ChannelHash = 0xDEAD_BEEF_DEAD_BEEF;
1956        assert_ne!(h_a, h_b);
1957        assert_eq!(h_a as u32, h_b as u32, "test setup: low 32 must alias");
1958
1959        let token = PermissionToken::issue(
1960            &issuer,
1961            subject.entity_id().clone(),
1962            TokenScope::PUBLISH,
1963            h_a,
1964            3600,
1965            0,
1966        );
1967        cache.insert(token).expect("token should verify");
1968
1969        // The legitimate channel admits the token.
1970        assert!(cache
1971            .check(subject.entity_id(), TokenScope::PUBLISH, h_a)
1972            .is_ok());
1973        // The u32-aliased channel must NOT admit it.
1974        assert!(
1975            cache
1976                .check(subject.entity_id(), TokenScope::PUBLISH, h_b)
1977                .is_err(),
1978            "token bound to h_a must not authorize h_b that aliases on low 32 bits"
1979        );
1980    }
1981
1982    #[test]
1983    fn test_token_cache_wildcard() {
1984        let issuer = EntityKeypair::generate();
1985        let subject = EntityKeypair::generate();
1986
1987        let cache = TokenCache::new();
1988
1989        // Wildcard token: explicit WILDCARD scope bit.
1990        let token = PermissionToken::issue(
1991            &issuer,
1992            subject.entity_id().clone(),
1993            TokenScope::PUBLISH.union(TokenScope::WILDCARD),
1994            0,
1995            3600,
1996            0,
1997        );
1998        let _ = cache.insert(token);
1999
2000        // Should match any channel
2001        assert!(cache
2002            .check(subject.entity_id(), TokenScope::PUBLISH, 0xABCD)
2003            .is_ok());
2004        assert!(cache
2005            .check(subject.entity_id(), TokenScope::PUBLISH, 0x1234)
2006            .is_ok());
2007    }
2008
2009    // ---- Regression tests for Cubic AI findings ----
2010
2011    #[test]
2012    fn test_regression_wildcard_fallback_not_blocked_by_expired_channel_token() {
2013        // Regression: token.is_valid()? short-circuited on an expired
2014        // channel-specific token, preventing the wildcard fallback from
2015        // being reached.
2016        let issuer = EntityKeypair::generate();
2017        let subject = EntityKeypair::generate();
2018        let cache = TokenCache::new();
2019
2020        // Insert an expired channel-specific token. Mint with
2021        // TTL=1 (try_issue rejects 0), then backdate not_after to
2022        // force expiry.
2023        let mut expired_token = PermissionToken::issue(
2024            &issuer,
2025            subject.entity_id().clone(),
2026            TokenScope::PUBLISH,
2027            0xABCD,
2028            1,
2029            0,
2030        );
2031        // Force expiry by setting not_after to the past
2032        expired_token.not_after = 0;
2033        // Re-sign with the modified field
2034        let payload = expired_token.signed_payload();
2035        expired_token.signature = issuer.sign(&payload).to_bytes();
2036        cache.insert_unchecked(expired_token);
2037
2038        // Insert a valid wildcard token (explicit WILDCARD scope bit).
2039        let wildcard_token = PermissionToken::issue(
2040            &issuer,
2041            subject.entity_id().clone(),
2042            TokenScope::PUBLISH.union(TokenScope::WILDCARD),
2043            0,
2044            3600,
2045            0,
2046        );
2047        cache.insert_unchecked(wildcard_token);
2048
2049        // The wildcard should be reached despite the expired channel token
2050        assert!(
2051            cache
2052                .check(subject.entity_id(), TokenScope::PUBLISH, 0xABCD)
2053                .is_ok(),
2054            "wildcard fallback must not be blocked by expired channel-specific token"
2055        );
2056    }
2057
2058    #[test]
2059    fn test_regression_delegate_rejects_expired_parent() {
2060        // Regression: delegate() minted child tokens from an invalid parent
2061        // because it never called is_valid() on the parent.
2062        let root = EntityKeypair::generate();
2063        let node_a = EntityKeypair::generate();
2064        let node_b = EntityKeypair::generate();
2065
2066        let mut token = PermissionToken::issue(
2067            &root,
2068            node_a.entity_id().clone(),
2069            TokenScope::ALL,
2070            0,
2071            3600,
2072            2,
2073        );
2074        // Force expiry
2075        token.not_after = 0;
2076        let payload = token.signed_payload();
2077        token.signature = root.sign(&payload).to_bytes();
2078
2079        let result = token.delegate(&node_a, node_b.entity_id().clone(), TokenScope::PUBLISH);
2080        assert_eq!(
2081            result.unwrap_err(),
2082            TokenError::Expired,
2083            "delegation from expired parent must be rejected"
2084        );
2085    }
2086
2087    #[test]
2088    fn test_regression_insert_rejects_tampered_token() {
2089        // Regression: insert() accepted self-signed/tampered tokens
2090        // because it did not verify the signature.
2091        let issuer = EntityKeypair::generate();
2092        let subject = EntityKeypair::generate();
2093
2094        let mut token = PermissionToken::issue(
2095            &issuer,
2096            subject.entity_id().clone(),
2097            TokenScope::PUBLISH,
2098            0,
2099            3600,
2100            0,
2101        );
2102        // Tamper: change scope after signing
2103        token.scope = TokenScope::ADMIN;
2104
2105        let cache = TokenCache::new();
2106        assert!(
2107            cache.insert(token).is_err(),
2108            "insert must reject tampered token"
2109        );
2110        assert_eq!(cache.len(), 0, "tampered token must not be cached");
2111    }
2112
2113    // ========================================================================
2114    // Cubic-flagged P1/P2 regressions
2115    // ========================================================================
2116
2117    /// Regression for a cubic-flagged P1: TokenCache used to key on
2118    /// `(subject, channel_hash)` and store a single token per slot,
2119    /// so inserting a SUBSCRIBE token after a PUBLISH token
2120    /// silently overwrote the earlier one. Both must coexist.
2121    #[test]
2122    fn cache_coexists_tokens_of_different_scopes_for_same_channel() {
2123        let issuer = EntityKeypair::generate();
2124        let subject = EntityKeypair::generate();
2125        let channel = 0xABCD;
2126
2127        let publish_tok = PermissionToken::issue(
2128            &issuer,
2129            subject.entity_id().clone(),
2130            TokenScope::PUBLISH,
2131            channel,
2132            3600,
2133            0,
2134        );
2135        let subscribe_tok = PermissionToken::issue(
2136            &issuer,
2137            subject.entity_id().clone(),
2138            TokenScope::SUBSCRIBE,
2139            channel,
2140            3600,
2141            0,
2142        );
2143
2144        let cache = TokenCache::new();
2145        cache.insert(publish_tok).expect("insert publish");
2146        cache.insert(subscribe_tok).expect("insert subscribe");
2147
2148        // Both authorizations must pass — the second insert used to
2149        // clobber the first because the cache was keyed without
2150        // considering scope.
2151        assert!(
2152            cache
2153                .check(subject.entity_id(), TokenScope::PUBLISH, channel)
2154                .is_ok(),
2155            "publish auth lost after subscribe insert",
2156        );
2157        assert!(
2158            cache
2159                .check(subject.entity_id(), TokenScope::SUBSCRIBE, channel)
2160                .is_ok(),
2161            "subscribe auth lost",
2162        );
2163    }
2164
2165    /// Regression for a cubic-flagged P2: after the storage change
2166    /// from `PermissionToken` to `Vec<PermissionToken>` per slot,
2167    /// `TokenCache::len()` kept returning `self.tokens.len()` —
2168    /// the slot count, not the token count. FFI / binding metrics
2169    /// silently undercounted whenever a slot held more than one
2170    /// scope. This test exercises the multi-scope case: two tokens
2171    /// share a slot, so a slot count of 1 coexists with a token
2172    /// count of 2 — `len()` must report 2.
2173    #[test]
2174    fn cache_len_reports_total_tokens_not_slot_count() {
2175        let issuer = EntityKeypair::generate();
2176        let subject = EntityKeypair::generate();
2177        let channel = 0xFEED;
2178
2179        let cache = TokenCache::new();
2180        assert_eq!(cache.len(), 0);
2181
2182        // Two tokens, same (subject, channel) slot, different scopes
2183        // — coexist in one Vec per `insert_unchecked`.
2184        cache
2185            .insert(PermissionToken::issue(
2186                &issuer,
2187                subject.entity_id().clone(),
2188                TokenScope::PUBLISH,
2189                channel,
2190                3600,
2191                0,
2192            ))
2193            .expect("insert publish");
2194        cache
2195            .insert(PermissionToken::issue(
2196                &issuer,
2197                subject.entity_id().clone(),
2198                TokenScope::SUBSCRIBE,
2199                channel,
2200                3600,
2201                0,
2202            ))
2203            .expect("insert subscribe");
2204
2205        assert_eq!(
2206            cache.len(),
2207            2,
2208            "len() must sum per-slot Vec lengths — two scopes in one slot means two tokens",
2209        );
2210
2211        // A third token with a different channel lives in its own
2212        // slot, bumping both slot count and token count to 2 / 3.
2213        cache
2214            .insert(PermissionToken::issue(
2215                &issuer,
2216                subject.entity_id().clone(),
2217                TokenScope::PUBLISH,
2218                0xBEEF,
2219                3600,
2220                0,
2221            ))
2222            .expect("insert publish-other");
2223        assert_eq!(
2224            cache.len(),
2225            3,
2226            "len() after a second slot must reflect 3 tokens total, not 2 slots",
2227        );
2228    }
2229
2230    /// Regression for the other half of the cache semantic: issuing
2231    /// a SECOND token with the same scope as an existing one
2232    /// should **replace** it, not stack. Otherwise repeated refreshes
2233    /// leak linear memory.
2234    #[test]
2235    fn cache_same_scope_reinsert_replaces_not_stacks() {
2236        let issuer = EntityKeypair::generate();
2237        let subject = EntityKeypair::generate();
2238        let channel = 0xABCD;
2239
2240        let cache = TokenCache::new();
2241        for _ in 0..10 {
2242            let tok = PermissionToken::issue(
2243                &issuer,
2244                subject.entity_id().clone(),
2245                TokenScope::SUBSCRIBE,
2246                channel,
2247                3600,
2248                0,
2249            );
2250            cache.insert(tok).expect("insert");
2251        }
2252        // All ten had scope=SUBSCRIBE. The cache should hold one
2253        // entry total (the most recent), not ten.
2254        assert_eq!(
2255            cache.len(),
2256            1,
2257            "repeated inserts with the same scope must replace, not stack",
2258        );
2259    }
2260
2261    /// Regression for a cubic-flagged P2: `from_bytes` used to
2262    /// accept any buffer ≥ WIRE_SIZE, silently ignoring trailing
2263    /// bytes. Concatenated / corrupted payloads must fail cleanly.
2264    #[test]
2265    fn from_bytes_rejects_trailing_garbage() {
2266        let issuer = EntityKeypair::generate();
2267        let subject = EntityKeypair::generate();
2268        let tok = PermissionToken::issue(
2269            &issuer,
2270            subject.entity_id().clone(),
2271            TokenScope::PUBLISH,
2272            0,
2273            3600,
2274            0,
2275        );
2276        let mut bytes = tok.to_bytes();
2277        assert_eq!(bytes.len(), PermissionToken::WIRE_SIZE);
2278        // Fresh bytes parse fine.
2279        assert!(PermissionToken::from_bytes(&bytes).is_ok());
2280
2281        // Append trailing garbage — parser must now refuse.
2282        bytes.push(0xFF);
2283        assert!(
2284            matches!(
2285                PermissionToken::from_bytes(&bytes),
2286                Err(TokenError::InvalidFormat)
2287            ),
2288            "trailing byte must reject as InvalidFormat",
2289        );
2290
2291        // Truncate by one — also refused (already was, but lock in).
2292        let truncated = &tok.to_bytes()[..PermissionToken::WIRE_SIZE - 1];
2293        assert!(matches!(
2294            PermissionToken::from_bytes(truncated),
2295            Err(TokenError::InvalidFormat)
2296        ));
2297    }
2298
2299    /// Security audit H3: an unbounded TTL is rejected at issue time.
2300    ///
2301    /// Pre-fix `issue()` saturated `now + u64::MAX` into a token whose
2302    /// `not_after == u64::MAX` — a never-expiring credential retirable
2303    /// only via the advisory revocation floor. `try_issue` now returns
2304    /// `TtlTooLong` for any `duration_secs > MAX_TOKEN_TTL_SECS`, and
2305    /// the panicking `issue` wrapper turns that into a clear panic.
2306    #[test]
2307    fn issue_rejects_ttl_above_max() {
2308        let issuer = EntityKeypair::generate();
2309        let subject = EntityKeypair::generate();
2310
2311        // u64::MAX (the old "immortal token" input) is rejected.
2312        assert!(matches!(
2313            PermissionToken::try_issue(
2314                &issuer,
2315                subject.entity_id().clone(),
2316                TokenScope::PUBLISH,
2317                0,
2318                u64::MAX,
2319                0,
2320            ),
2321            Err(TokenError::TtlTooLong)
2322        ));
2323
2324        // One second past the ceiling is rejected.
2325        assert!(matches!(
2326            PermissionToken::try_issue(
2327                &issuer,
2328                subject.entity_id().clone(),
2329                TokenScope::PUBLISH,
2330                0,
2331                MAX_TOKEN_TTL_SECS + 1,
2332                0,
2333            ),
2334            Err(TokenError::TtlTooLong)
2335        ));
2336
2337        // Exactly the ceiling is accepted and produces a bounded,
2338        // non-saturated expiry.
2339        let tok = PermissionToken::try_issue(
2340            &issuer,
2341            subject.entity_id().clone(),
2342            TokenScope::PUBLISH,
2343            0,
2344            MAX_TOKEN_TTL_SECS,
2345            0,
2346        )
2347        .expect("max-TTL token must issue");
2348        assert!(tok.not_after < u64::MAX, "expiry must not saturate");
2349        assert!(tok.is_valid().is_ok());
2350        assert!(tok.verify().is_ok());
2351    }
2352
2353    /// The panicking `issue` wrapper surfaces an over-long TTL as a
2354    /// panic rather than minting an immortal token.
2355    #[test]
2356    #[should_panic(expected = "MAX_TOKEN_TTL_SECS")]
2357    fn issue_panics_on_ttl_above_max() {
2358        let issuer = EntityKeypair::generate();
2359        let subject = EntityKeypair::generate();
2360        let _ = PermissionToken::issue(
2361            &issuer,
2362            subject.entity_id().clone(),
2363            TokenScope::PUBLISH,
2364            0,
2365            u64::MAX,
2366            0,
2367        );
2368    }
2369
2370    /// Security audit M2: `TokenCache` clock-skew tolerance is clamped
2371    /// to `MAX_TOKEN_CLOCK_SKEW_SECS` so a misconfiguration can't widen
2372    /// every token's validity window without bound.
2373    #[test]
2374    fn clock_skew_is_clamped_to_max() {
2375        // Constructor clamps.
2376        let cache = TokenCache::with_clock_skew(u64::MAX);
2377        assert_eq!(cache.clock_skew_secs(), MAX_TOKEN_CLOCK_SKEW_SECS);
2378
2379        // Setter clamps.
2380        let mut cache = TokenCache::new();
2381        cache.set_clock_skew(u64::MAX);
2382        assert_eq!(cache.clock_skew_secs(), MAX_TOKEN_CLOCK_SKEW_SECS);
2383
2384        // In-range values pass through unchanged.
2385        let cache = TokenCache::with_clock_skew(TOKEN_CLOCK_SKEW_SECS_RECOMMENDED);
2386        assert_eq!(cache.clock_skew_secs(), TOKEN_CLOCK_SKEW_SECS_RECOMMENDED);
2387    }
2388
2389    /// Regression for a cubic-flagged P2: `delegate()` computed the
2390    /// child's TTL as `parent.not_after - current_timestamp()` and
2391    /// then passed that duration back through `issue()`, which
2392    /// re-reads `current_timestamp()`. When the parent was close
2393    /// to expiry the double-read shaved meaningful lifetime off
2394    /// the child — in the worst case, a child token born already
2395    /// expired. The fix copies `parent.not_after` directly.
2396    #[test]
2397    fn delegate_preserves_parent_not_after() {
2398        let a = EntityKeypair::generate();
2399        let b = EntityKeypair::generate();
2400        let c = EntityKeypair::generate();
2401
2402        let parent = PermissionToken::issue(
2403            &a,
2404            b.entity_id().clone(),
2405            TokenScope::PUBLISH.union(TokenScope::DELEGATE),
2406            0,
2407            3600,
2408            2,
2409        );
2410
2411        let child = parent
2412            .delegate(&b, c.entity_id().clone(), TokenScope::PUBLISH)
2413            .expect("delegate");
2414
2415        assert_eq!(
2416            child.not_after, parent.not_after,
2417            "child's not_after must equal parent's, not some smaller value \
2418             derived from a second clock read",
2419        );
2420        // child.not_before was stamped by the child's own clock
2421        // read, so it's ≥ parent.not_before — which is correct.
2422        assert!(child.not_before >= parent.not_before);
2423        assert!(child.verify().is_ok());
2424    }
2425
2426    // ========================================================================
2427    // TEST_COVERAGE_PLAN §P2-9 — TokenCache concurrency safety.
2428    //
2429    // The cache is a DashMap, so entry-level writes are atomic,
2430    // but the mesh-side usage pattern runs insert / check /
2431    // evict_expired on the same entry from three different
2432    // tokio tasks under load. These tests pin: no panic, no
2433    // torn reads, terminal state coherent.
2434    // ========================================================================
2435
2436    /// Concurrent `insert_unchecked` (authorize) + `check`
2437    /// (authorize-gate) + `evict_expired` (sweep) on the same
2438    /// subject+channel must not panic or produce an inconsistent
2439    /// terminal state. The observer thread's `check` must always
2440    /// return a deterministic `Ok(())` or `Err(NotAuthorized)`
2441    /// — never a corrupted DashMap state (which would manifest
2442    /// as a panic inside `iter().any(...)`).
2443    #[test]
2444    fn concurrent_insert_check_evict_is_panic_free() {
2445        use std::sync::{Arc, Barrier};
2446        use std::thread;
2447
2448        let cache = Arc::new(TokenCache::new());
2449        let issuer = EntityKeypair::generate();
2450        let subject_kp = EntityKeypair::generate();
2451        let subject_id = subject_kp.entity_id().clone();
2452        let channel_hash: ChannelHash = 0xABCD;
2453        let iters = 500u32;
2454        // Start barrier — without it thread scheduling can let
2455        // the evictor run its whole loop before the inserter
2456        // even starts, trivializing the race.
2457        let start = Arc::new(Barrier::new(3));
2458
2459        // Inserter: re-issue + replace the token on each
2460        // iteration. Each insert overwrites the previous entry
2461        // (same scope → `insert_unchecked`'s `iter_mut().find`
2462        // path replaces rather than pushes).
2463        let inserter = {
2464            let cache = cache.clone();
2465            let issuer = issuer.clone();
2466            let subject_id = subject_id.clone();
2467            let start = start.clone();
2468            thread::spawn(move || {
2469                start.wait();
2470                for _ in 0..iters {
2471                    let token = PermissionToken::issue(
2472                        &issuer,
2473                        subject_id.clone(),
2474                        TokenScope::SUBSCRIBE,
2475                        channel_hash,
2476                        300,
2477                        0,
2478                    );
2479                    cache.insert_unchecked(token);
2480                }
2481            })
2482        };
2483
2484        // Checker: gate queries fire on the hot path. Must not
2485        // panic, must return a deterministic Result.
2486        let checker = {
2487            let cache = cache.clone();
2488            let subject_id = subject_id.clone();
2489            let start = start.clone();
2490            thread::spawn(move || {
2491                start.wait();
2492                for _ in 0..iters {
2493                    let _ = cache.check(&subject_id, TokenScope::SUBSCRIBE, channel_hash);
2494                }
2495            })
2496        };
2497
2498        // Evictor: periodic sweep. `evict_expired` walks every
2499        // slot and retains only not-yet-expired tokens; with
2500        // 300 s TTLs and a sub-second test, no tokens expire so
2501        // no entries should actually be removed, but the retain
2502        // closure must run safely against the writer.
2503        let evictor = {
2504            let cache = cache.clone();
2505            let start = start.clone();
2506            thread::spawn(move || {
2507                start.wait();
2508                for _ in 0..iters {
2509                    cache.evict_expired();
2510                }
2511            })
2512        };
2513
2514        inserter.join().expect("inserter panicked");
2515        checker.join().expect("checker panicked");
2516        evictor.join().expect("evictor panicked");
2517
2518        // Terminal state: exactly one token present for this
2519        // (subject, channel_hash, SUBSCRIBE) slot. The inserter
2520        // replaced on every iteration — the final token must
2521        // be valid, and `check` must return Ok(()) against it.
2522        assert!(
2523            cache
2524                .check(&subject_id, TokenScope::SUBSCRIBE, channel_hash)
2525                .is_ok(),
2526            "terminal check must succeed — the last insert's token is unexpired",
2527        );
2528        assert_eq!(
2529            cache.len(),
2530            1,
2531            "exactly one token should remain (same-scope replace path); got {}",
2532            cache.len(),
2533        );
2534    }
2535
2536    /// A token that expires mid-test must be dropped by a
2537    /// concurrent `evict_expired`. The checker's `check` must
2538    /// return `Ok(())` while the token is still valid and
2539    /// consistently `Err(NotAuthorized)` after eviction, never
2540    /// a panic from a retain that ran mid-iter.
2541    #[test]
2542    fn evict_expired_races_with_check_without_panic() {
2543        use std::sync::{Arc, Barrier};
2544        use std::thread;
2545        use std::time::Duration;
2546
2547        let cache = Arc::new(TokenCache::new());
2548        let issuer = EntityKeypair::generate();
2549        let subject_kp = EntityKeypair::generate();
2550        let subject_id = subject_kp.entity_id().clone();
2551        let channel_hash: ChannelHash = 0xBEEF;
2552
2553        // Short-lived token: 3 s TTL. Insert it then let it
2554        // expire naturally during the race.
2555        //
2556        // `current_timestamp` is second-resolution, so a 1 s TTL has
2557        // a ~1 s race window: insert at the very end of second T can
2558        // produce `not_after == T + 1` while the immediate post-
2559        // insert check already runs in second T + 1 (`now >=
2560        // not_after` → Expired). 3 s of headroom keeps the pre-
2561        // expiry check robust without lengthening the race body.
2562        let token = PermissionToken::issue(
2563            &issuer,
2564            subject_id.clone(),
2565            TokenScope::PUBLISH,
2566            channel_hash,
2567            3, // 3-second TTL
2568            0,
2569        );
2570        cache.insert_unchecked(token);
2571        assert!(
2572            cache
2573                .check(&subject_id, TokenScope::PUBLISH, channel_hash)
2574                .is_ok(),
2575            "pre-expiry check should succeed",
2576        );
2577
2578        let start = Arc::new(Barrier::new(2));
2579        let checker = {
2580            let cache = cache.clone();
2581            let subject_id = subject_id.clone();
2582            let start = start.clone();
2583            thread::spawn(move || {
2584                start.wait();
2585                for _ in 0..2_000 {
2586                    // Outcome may transition from Ok → Err
2587                    // exactly once during this loop as the TTL
2588                    // elapses. Either result is valid; panic
2589                    // is not.
2590                    let _ = cache.check(&subject_id, TokenScope::PUBLISH, channel_hash);
2591                }
2592            })
2593        };
2594        let evictor = {
2595            let cache = cache.clone();
2596            let start = start.clone();
2597            thread::spawn(move || {
2598                start.wait();
2599                for _ in 0..2_000 {
2600                    cache.evict_expired();
2601                }
2602            })
2603        };
2604
2605        // Wait for TTL to elapse. `current_timestamp` is
2606        // second-resolution, so 3.5 s of wall clock guarantees
2607        // `not_after` < `now` for the 3 s TTL above.
2608        thread::sleep(Duration::from_millis(3_500));
2609
2610        checker.join().expect("checker panicked");
2611        evictor.join().expect("evictor panicked");
2612
2613        // Terminal: a fresh evict + check — the token's TTL
2614        // has expired and the evictor swept at least once since,
2615        // so check must return NotAuthorized.
2616        cache.evict_expired();
2617        match cache.check(&subject_id, TokenScope::PUBLISH, channel_hash) {
2618            Err(TokenError::NotAuthorized) => {}
2619            other => panic!("expected NotAuthorized after TTL + evict; got {other:?}"),
2620        }
2621    }
2622
2623    // ========================================================================
2624    // TokenCache must bound slot growth and within-slot growth
2625    // ========================================================================
2626
2627    /// Helper: build a token whose subject is the bytes of `subject_seed`
2628    /// padded into an EntityId, on `channel_hash`. We bypass the
2629    /// `insert(...)` signature-verify path by issuing real tokens —
2630    /// this is a fast-enough way to get many distinct subjects.
2631    fn issue_token_for(seed: u64, channel_hash: ChannelHash, scope: TokenScope) -> PermissionToken {
2632        let issuer = EntityKeypair::generate();
2633        // EntityKeypair::generate uses entropy; we just need many
2634        // distinct subjects, so a per-iteration generate is fine
2635        // (the test caps iteration counts).
2636        let _ = seed;
2637        let subject = EntityKeypair::generate();
2638        PermissionToken::issue(
2639            &issuer,
2640            subject.entity_id().clone(),
2641            scope,
2642            channel_hash,
2643            3600,
2644            0,
2645        )
2646    }
2647
2648    /// Once `MAX_TOKEN_SLOTS` distinct slot keys are present,
2649    /// further `insert_unchecked` calls with NOVEL keys must NOT
2650    /// admit a new slot. Existing-slot refresh paths still work
2651    /// (covered by `replays_existing_subject_when_slot_cap_is_full`
2652    /// below). Pre-fix the cache grew linearly with peer-supplied
2653    /// `(subject, channel_hash)` cardinality.
2654    ///
2655    /// Setup uses a single subject and varies `channel_hash` so
2656    /// the slot keys differ — this avoids spending O(slots) ed25519
2657    /// keypair generations.
2658    #[test]
2659    fn insert_unchecked_drops_novel_slot_when_at_max_token_slots() {
2660        let issuer = EntityKeypair::generate();
2661        let subject = EntityKeypair::generate();
2662        let cache = TokenCache::new();
2663
2664        // Fill the cache to capacity using the same subject with
2665        // varying channel_hash. `MAX_TOKEN_SLOTS` is 65_536; channel
2666        // hash is `ChannelHash` (u64, ~18 EB distinct values), so we
2667        // pack the cache to capacity by varying the low bits.
2668        // Building 65_536 PermissionTokens would do 65_536 ed25519
2669        // signs, which is too slow for a unit test — instead we
2670        // pre-build one template and clone with mutated
2671        // channel_hash. The signature stops being valid after the
2672        // mutation, but `insert_unchecked` skips verify, so the
2673        // cache shape under test is identical to the real path.
2674        let template = PermissionToken::issue(
2675            &issuer,
2676            subject.entity_id().clone(),
2677            TokenScope::PUBLISH,
2678            0,
2679            3600,
2680            0,
2681        );
2682        for ch in 0u32..MAX_TOKEN_SLOTS as u32 {
2683            let mut t = template.clone();
2684            t.channel_hash = ch as ChannelHash;
2685            cache.insert_unchecked(t);
2686        }
2687        let len_before_overflow = cache.tokens.len();
2688        assert_eq!(
2689            len_before_overflow, MAX_TOKEN_SLOTS,
2690            "test setup: cache must be filled to capacity",
2691        );
2692
2693        // A NOVEL slot key — different subject, channel_hash=0 — must
2694        // be dropped at the cap.
2695        let other_subject = EntityKeypair::generate();
2696        let novel = PermissionToken::issue(
2697            &issuer,
2698            other_subject.entity_id().clone(),
2699            TokenScope::PUBLISH,
2700            0,
2701            3600,
2702            0,
2703        );
2704        cache.insert_unchecked(novel);
2705
2706        assert_eq!(
2707            cache.tokens.len(),
2708            MAX_TOKEN_SLOTS,
2709            "novel slot must be rejected at MAX_TOKEN_SLOTS cap",
2710        );
2711    }
2712
2713    /// At capacity, refreshing an EXISTING slot key (same subject +
2714    /// same channel_hash) must still succeed — we only refuse novel
2715    /// keys. Pins that the cap doesn't accidentally lock out
2716    /// legitimate token refreshes once a peer-driven flood has filled
2717    /// the cache.
2718    #[test]
2719    fn insert_unchecked_replays_existing_subject_when_slot_cap_is_full() {
2720        let issuer = EntityKeypair::generate();
2721        let subject = EntityKeypair::generate();
2722        let cache = TokenCache::new();
2723
2724        // Fill to capacity.
2725        let template = PermissionToken::issue(
2726            &issuer,
2727            subject.entity_id().clone(),
2728            TokenScope::PUBLISH,
2729            0,
2730            3600,
2731            0,
2732        );
2733        for ch in 0u32..MAX_TOKEN_SLOTS as u32 {
2734            let mut t = template.clone();
2735            t.channel_hash = ch as ChannelHash;
2736            cache.insert_unchecked(t);
2737        }
2738        assert_eq!(cache.tokens.len(), MAX_TOKEN_SLOTS);
2739
2740        // Refresh an existing slot (subject + channel_hash=42 already
2741        // present). Must succeed — same scope replaces same scope.
2742        let mut refresh = template.clone();
2743        refresh.channel_hash = 42;
2744        refresh.nonce = 9999;
2745        cache.insert_unchecked(refresh);
2746
2747        assert_eq!(cache.tokens.len(), MAX_TOKEN_SLOTS, "slot count unchanged");
2748        let slot = cache
2749            .tokens
2750            .get(&(*subject.entity_id().as_bytes(), 42 as ChannelHash))
2751            .unwrap();
2752        assert_eq!(slot.value().len(), 1, "still one token in slot");
2753        assert_eq!(slot.value()[0].nonce, 9999, "refresh replaced the token");
2754    }
2755
2756    /// Within a single slot, novel scope bitfields stack up to
2757    /// `MAX_TOKENS_PER_SLOT`; beyond that the new-scope path drops
2758    /// silently. Refreshing an existing scope still wins.
2759    #[test]
2760    fn insert_unchecked_caps_within_slot_token_count() {
2761        let issuer = EntityKeypair::generate();
2762        let subject = EntityKeypair::generate();
2763        let cache = TokenCache::new();
2764
2765        // Pack the slot with MAX_TOKENS_PER_SLOT distinct-scope
2766        // tokens. We use the bitfield directly via from_bits to
2767        // produce many distinct scope values cheaply. Each token
2768        // stays at the same (subject, channel_hash) so they share
2769        // a slot.
2770        let channel: ChannelHash = 0xCAFE;
2771        let template = PermissionToken::issue(
2772            &issuer,
2773            subject.entity_id().clone(),
2774            TokenScope::PUBLISH,
2775            channel,
2776            3600,
2777            0,
2778        );
2779        for i in 0..MAX_TOKENS_PER_SLOT as u32 {
2780            let mut t = template.clone();
2781            // Vary the high bits so each scope value is distinct
2782            // AND has the WILDCARD bit (0b1_0000 / 0x10) consistently
2783            // *un*set — otherwise tokens with WILDCARD set would
2784            // route to `slot_channel = 0` and split off into a
2785            // different slot, dodging the within-slot cap test.
2786            // Shift `i` past the WILDCARD bit so it never
2787            // accidentally lights up.
2788            t.scope = TokenScope::from_bits(0x10_0000 | (i << 8));
2789            cache.insert_unchecked(t);
2790        }
2791        let slot_before = cache
2792            .tokens
2793            .get(&(*subject.entity_id().as_bytes(), channel))
2794            .unwrap();
2795        assert_eq!(
2796            slot_before.value().len(),
2797            MAX_TOKENS_PER_SLOT,
2798            "test setup: slot must be packed to within-slot cap",
2799        );
2800        drop(slot_before); // release DashMap ref before the next op
2801
2802        // A token with a NOVEL scope bitfield must be dropped.
2803        // Use a value that ALSO doesn't set the WILDCARD bit so it
2804        // routes to the same slot as the packed entries.
2805        let novel_scope_bits = 0x20_0000u32;
2806        let mut over = template.clone();
2807        over.scope = TokenScope::from_bits(novel_scope_bits);
2808        cache.insert_unchecked(over);
2809        let slot_after = cache
2810            .tokens
2811            .get(&(*subject.entity_id().as_bytes(), channel))
2812            .unwrap();
2813        assert_eq!(
2814            slot_after.value().len(),
2815            MAX_TOKENS_PER_SLOT,
2816            "novel scope must be rejected at MAX_TOKENS_PER_SLOT",
2817        );
2818        assert!(
2819            slot_after
2820                .value()
2821                .iter()
2822                .all(|t| t.scope.bits() != novel_scope_bits),
2823            "the dropped scope must not be present in the slot",
2824        );
2825
2826        // Refresh of an EXISTING scope still wins, even at cap.
2827        let _ = issue_token_for; // silence unused warning if future tests don't need it
2828        drop(slot_after);
2829        // Refresh the i=0 entry — its scope was 0x10_0000.
2830        let mut refresh = template.clone();
2831        refresh.scope = TokenScope::from_bits(0x10_0000);
2832        refresh.nonce = 1111;
2833        cache.insert_unchecked(refresh);
2834        let slot_after_refresh = cache
2835            .tokens
2836            .get(&(*subject.entity_id().as_bytes(), channel))
2837            .unwrap();
2838        let refreshed = slot_after_refresh
2839            .value()
2840            .iter()
2841            .find(|t| t.scope.bits() == 0x10_0000)
2842            .expect("scope 0x10_0000 must still be present");
2843        assert_eq!(
2844            refreshed.nonce, 1111,
2845            "refresh-of-existing-scope must succeed at cap"
2846        );
2847    }
2848
2849    /// Concurrent novel-key inserts must NOT overshoot
2850    /// `MAX_TOKEN_SLOTS`. Pre-fix the path was:
2851    ///
2852    /// ```ignore
2853    /// if !contains_key(&key) && len() >= cap { return; }
2854    /// entry(key).or_default()...
2855    /// ```
2856    ///
2857    /// N threads could all observe `len() < cap` simultaneously and
2858    /// each go on to `or_default()` a fresh entry — overshoot
2859    /// proportional to N (bounded only by concurrency, NOT by
2860    /// `DashMap` shard count as the prior comment claimed). Under a
2861    /// peer-driven token flood across a multi-core daemon, this
2862    /// uncaps the cache.
2863    ///
2864    /// Prefill the DashMap directly (bypassing the expensive
2865    /// PermissionToken::issue ed25519-sign + clone pipeline) to
2866    /// `MAX_TOKEN_SLOTS - SLACK`, then run `THREADS` concurrent
2867    /// `insert_unchecked` calls each carrying a distinct novel key.
2868    /// After the dust settles, the cache must hold at most
2869    /// `MAX_TOKEN_SLOTS` — the slack lets a few inserts succeed
2870    /// (correct) while the rest must roll back (the gate).
2871    #[test]
2872    fn insert_unchecked_does_not_overshoot_under_concurrent_novel_inserts() {
2873        use std::sync::Arc;
2874        use std::thread;
2875
2876        const SLACK: usize = 4;
2877        const THREADS: usize = 32;
2878
2879        let issuer = EntityKeypair::generate();
2880        let subject = EntityKeypair::generate();
2881        let cache = Arc::new(TokenCache::new());
2882
2883        // Build one template token (one ed25519 sign) and seed
2884        // `MAX_TOKEN_SLOTS - SLACK` slot keys directly into the
2885        // backing `DashMap`. We aren't testing what the prefill
2886        // path does — we're testing the cap gate against a single
2887        // wave of concurrent novel inserts, so prefill speed
2888        // matters, not prefill semantics.
2889        let template = PermissionToken::issue(
2890            &issuer,
2891            subject.entity_id().clone(),
2892            TokenScope::PUBLISH,
2893            0,
2894            3600,
2895            0,
2896        );
2897        let prefill = MAX_TOKEN_SLOTS - SLACK;
2898        for ch in 0u32..prefill as u32 {
2899            let mut t = template.clone();
2900            t.channel_hash = ch as ChannelHash;
2901            cache.tokens.insert(
2902                (*subject.entity_id().as_bytes(), ch as ChannelHash),
2903                vec![t],
2904            );
2905        }
2906        assert_eq!(cache.tokens.len(), prefill);
2907
2908        // Each thread inserts a unique novel key (different subject
2909        // bytes — synthesized directly so we don't pay an ed25519
2910        // generate per thread). The race is around the gate's
2911        // len()-check vs entry-insert.
2912        let barrier = Arc::new(std::sync::Barrier::new(THREADS));
2913        let mut handles = Vec::with_capacity(THREADS);
2914        for tid in 0..THREADS {
2915            let cache = Arc::clone(&cache);
2916            let mut novel = template.clone();
2917            // Synthesize a novel subject by mutating the bytes —
2918            // identity verification is bypassed by `insert_unchecked`.
2919            let mut subj_bytes = *subject.entity_id().as_bytes();
2920            subj_bytes[0] ^= (tid as u8).wrapping_add(1);
2921            subj_bytes[1] ^= ((tid >> 8) as u8).wrapping_add(1);
2922            novel.subject = EntityId::from_bytes(subj_bytes);
2923            novel.channel_hash = (prefill + tid) as ChannelHash;
2924            let barrier = Arc::clone(&barrier);
2925            handles.push(thread::spawn(move || {
2926                barrier.wait();
2927                cache.insert_unchecked(novel);
2928            }));
2929        }
2930        for h in handles {
2931            h.join().unwrap();
2932        }
2933
2934        // The strong invariant: NEVER exceed cap. Pre-fix this would
2935        // overshoot to up to `prefill + THREADS = cap - SLACK + THREADS`.
2936        let final_len = cache.tokens.len();
2937        assert!(
2938            final_len <= MAX_TOKEN_SLOTS,
2939            "cache overshot cap under concurrent novel inserts: {final_len} > {MAX_TOKEN_SLOTS}",
2940        );
2941        // Sanity: at least the prefill survives — concurrent inserts
2942        // must never remove pre-existing slots.
2943        assert!(
2944            final_len >= prefill,
2945            "prefill leaked: {final_len} < {prefill}",
2946        );
2947    }
2948
2949    // ========================================================================
2950    // try_issue / delegate must NOT panic on public-only keypair
2951    // ========================================================================
2952
2953    /// `try_issue` returns `TokenError::ReadOnly` instead of
2954    /// panicking when the issuer keypair is public-only (e.g.
2955    /// post-migration zeroize). FFI bindings route through this
2956    /// to avoid panic-unwinding across `extern "C"`.
2957    #[test]
2958    fn try_issue_returns_read_only_on_public_only_keypair() {
2959        let full = EntityKeypair::generate();
2960        // Build a public-only sibling that shares the same entity_id.
2961        let public_only = EntityKeypair::public_only(full.entity_id().clone());
2962        assert!(public_only.try_sign(b"x").is_err());
2963
2964        let subject = EntityKeypair::generate();
2965        let result = PermissionToken::try_issue(
2966            &public_only,
2967            subject.entity_id().clone(),
2968            TokenScope::PUBLISH,
2969            0,
2970            3600,
2971            0,
2972        );
2973        assert!(
2974            matches!(result, Err(TokenError::ReadOnly)),
2975            "try_issue must surface public-only keypair as ReadOnly, got {:?}",
2976            result.map(|_| "Ok"),
2977        );
2978    }
2979
2980    /// `delegate` likewise surfaces a public-only signer as
2981    /// `TokenError::ReadOnly`. The original `delegate` already
2982    /// returns `Result`, so no API change was needed — only the
2983    /// internal `sign` call was switched to `try_sign`.
2984    #[test]
2985    fn delegate_returns_read_only_on_public_only_signer() {
2986        let issuer = EntityKeypair::generate();
2987        let subject_full = EntityKeypair::generate();
2988        let target = EntityKeypair::generate();
2989
2990        let parent = PermissionToken::issue(
2991            &issuer,
2992            subject_full.entity_id().clone(),
2993            TokenScope::PUBLISH.union(TokenScope::DELEGATE),
2994            0xCAFE,
2995            3600,
2996            3,
2997        );
2998
2999        // Subject becomes public-only (post-migration zeroize).
3000        let subject_pub = EntityKeypair::public_only(subject_full.entity_id().clone());
3001        let result = parent.delegate(
3002            &subject_pub,
3003            target.entity_id().clone(),
3004            TokenScope::PUBLISH,
3005        );
3006        assert!(
3007            matches!(result, Err(TokenError::ReadOnly)),
3008            "delegate must surface public-only signer as ReadOnly, got {:?}",
3009            result.map(|_| "Ok"),
3010        );
3011    }
3012
3013    /// `try_issue` succeeds with a full keypair — pins the success
3014    /// path so a future tightening doesn't accidentally over-reject.
3015    #[test]
3016    fn try_issue_succeeds_with_full_keypair() {
3017        let issuer = EntityKeypair::generate();
3018        let subject = EntityKeypair::generate();
3019        let token = PermissionToken::try_issue(
3020            &issuer,
3021            subject.entity_id().clone(),
3022            TokenScope::PUBLISH,
3023            0,
3024            3600,
3025            0,
3026        )
3027        .expect("try_issue must succeed with a full keypair");
3028        assert!(token.verify().is_ok());
3029    }
3030}