pas-external 4.0.2

Ppoppo Accounts System (PAS) external SDK -- OAuth2 PKCE, PASETO verification, Axum middleware, session liveness
Documentation
//! Session liveness verification against PAS.
//!
//! Consumers who treat PAS as the single source of truth for session
//! validity call [`attempt_liveness_refresh`] periodically (e.g., every
//! 15 minutes since a session was last verified). The helper handles the
//! decrypt → call PAS → re-encrypt-if-rotated sequence and returns a
//! [`LivenessOutcome`] that cleanly splits "keep trusting the cache",
//! "drop the session", and "PAS is shaky, serve cache for now".
//!
//! # Why three outcomes
//!
//! A two-state "alive / dead" classification is too coarse in production:
//! a transient PAS outage (5xx, network hiccup) would force-logout every
//! active user every 15 minutes. Splitting transient out lets the
//! consumer serve the most recently verified cache through a PAS blip.
//!
//! The inverse — treating every error as transient — is also unsafe:
//! `invalid_grant` (4xx) is PAS's way of saying "this refresh_token is
//! dead, stop honoring the session." Masking that as transient leaves
//! revoked sessions live.
//!
//! # Cause variants
//!
//! Both [`LivenessFailure`] variants carry a cause ([`RevokeCause`] /
//! [`TransientCause`]) so consumers can log *why* without the SDK
//! having to emit its own tracing events. A `Revoked { cause:
//! CipherFailure }` must be investigated (operator action — key
//! rotation accident, DB tamper) while `Revoked { cause: PasRejected }`
//! is routine (user logged out elsewhere). Conflating the two masks
//! real incidents.
//!
//! # What the SDK does NOT do
//!
//! - It does not persist `last_verified_at` or `revoked_at`. The consumer
//!   decides the schema and writes to it after observing the outcome.
//! - It does not decide when to run the check. The consumer gates on its
//!   own "stale?" predicate before calling [`attempt_liveness_refresh`].
//! - It does not log. The consumer logs with its own correlation IDs
//!   (session_id, user_id) and the returned cause.

use std::time::Duration;

use super::cipher::TokenCipher;
use crate::error::Error;
use crate::oauth::{AuthClient, TokenResponse};

/// Default back-off hint for transient failures. Not a timeout — just a
/// signal the consumer can forward to client retry logic.
const DEFAULT_TRANSIENT_RETRY_AFTER: Duration = Duration::from_secs(2);

/// Why a session was revoked.
///
/// `#[non_exhaustive]` — new causes may be added (e.g., future PAS
/// revocation reasons) without a breaking change. Match with a trailing
/// `_ => {}` arm.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum RevokeCause {
    /// The stored ciphertext could not be decrypted. Indicates a local
    /// issue (key rotation without re-encrypt, DB tamper, column
    /// corruption). Operators should investigate — this is *not* a
    /// routine user-action revocation.
    CipherFailure,
    /// PAS returned a permanent OAuth error (4xx). User logged out on
    /// another device, admin revocation, or refresh_token expired.
    /// Routine — no operator action required.
    PasRejected,
}

/// Why a liveness attempt was classified transient.
///
/// `#[non_exhaustive]` — same rationale as [`RevokeCause`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum TransientCause {
    /// PAS returned a 5xx response. PAS-side degradation; next request
    /// will retry.
    PasServerError,
    /// HTTP transport layer error (timeout, connect, TLS).
    Transport,
    /// Encrypting a *rotated* refresh_token failed after PAS already
    /// confirmed liveness. Local infrastructure issue; the session
    /// remains authenticated using the previously stored ciphertext.
    CipherEncryptFailed,
    /// Unclassifiable error shape (e.g., OAuth response with no status,
    /// unexpected error variant). Fail-safe default — the S-L3
    /// invariant ("transient failures must not force-logout") means
    /// every error we cannot confidently classify as permanent must be
    /// served from cache.
    Unknown,
}

/// A liveness attempt that did not confirm the session is fresh.
///
/// Returned by [`classify_refresh_error`] and embedded in
/// [`LivenessOutcome::Failed`].
#[derive(Debug)]
#[must_use]
pub enum LivenessFailure {
    /// PAS rejected the refresh_token, or the SDK cannot recover it.
    /// Consumer should mark the session revoked and drop the auth
    /// context.
    Revoked { cause: RevokeCause },
    /// PAS is temporarily unreachable or the SDK hit a local
    /// infrastructure issue. Consumer should serve the cached session.
    /// `retry_after` is a back-off hint the consumer may forward; it
    /// is not an obligation.
    Transient {
        retry_after: Option<Duration>,
        cause: TransientCause,
    },
}

/// Outcome of a single PAS liveness round-trip.
///
/// Split from [`LivenessFailure`] so the classifier (which can never
/// produce `Fresh`) has a narrower return type and so consumers that
/// want to handle all failure modes uniformly can match
/// `LivenessOutcome::Failed(_)` once. Intentionally *not*
/// `#[non_exhaustive]`: the Fresh-vs-Failed split is a complete
/// categorization; forcing `_ => {}` arms would sacrifice exhaustiveness
/// checking for no real gain.
#[derive(Debug)]
#[must_use]
pub enum LivenessOutcome {
    /// Session was reconfirmed against PAS. If `rotated_ciphertext` is
    /// `Some(ct)`, persist that as the new stored ciphertext (PAS
    /// rotated). If `None`, the existing ciphertext is still valid.
    /// Always update `last_verified_at`.
    Fresh { rotated_ciphertext: Option<String> },
    /// See [`LivenessFailure`].
    Failed(LivenessFailure),
}

/// Map a PAS [`Error`] from [`AuthClient::refresh_token`] into a
/// [`LivenessFailure`].
///
/// Classification:
///
/// | Source | Variant | Cause |
/// |--------|---------|-------|
/// | OAuth 4xx | `Revoked` | `PasRejected` |
/// | OAuth 5xx | `Transient` | `PasServerError` |
/// | OAuth missing status | `Transient` | `Unknown` |
/// | HTTP transport error (timeout, connect, TLS) | `Transient` | `Transport` |
/// | Anything else | `Transient` | `Unknown` |
///
/// # Why `status: None` → `Transient`
///
/// The only path that produces `Error::OAuth { status: None, .. }` in
/// the SDK is a **successful HTTP response whose body failed to parse**
/// (see `oauth.rs::send_and_deserialize`). That is fundamentally a
/// transport/proxy issue — a CDN injecting HTML, a misconfigured
/// Content-Type, a partial gzip response. Treating it as `Revoked`
/// would force-logout every user during such an outage, directly
/// violating the S-L3 invariant. Classifying as `Transient` preserves
/// the cached session until PAS becomes reachable again.
pub fn classify_refresh_error(err: &Error) -> LivenessFailure {
    match err {
        Error::OAuth {
            status: Some(code), ..
        } if (500..600).contains(code) => LivenessFailure::Transient {
            retry_after: Some(DEFAULT_TRANSIENT_RETRY_AFTER),
            cause: TransientCause::PasServerError,
        },
        Error::OAuth {
            status: Some(_), ..
        } => LivenessFailure::Revoked {
            cause: RevokeCause::PasRejected,
        },
        Error::OAuth { status: None, .. } => LivenessFailure::Transient {
            retry_after: Some(DEFAULT_TRANSIENT_RETRY_AFTER),
            cause: TransientCause::Unknown,
        },
        #[cfg(feature = "oauth")]
        Error::Http(_) => LivenessFailure::Transient {
            retry_after: Some(DEFAULT_TRANSIENT_RETRY_AFTER),
            cause: TransientCause::Transport,
        },
        _ => LivenessFailure::Transient {
            retry_after: Some(DEFAULT_TRANSIENT_RETRY_AFTER),
            cause: TransientCause::Unknown,
        },
    }
}

/// Run one liveness round-trip against PAS.
///
/// Flow:
///
/// 1. Decrypt `ciphertext`. On failure, return
///    `LivenessOutcome::Failed(Revoked { cause: CipherFailure })` —
///    the stored ciphertext is beyond recovery and the consumer should
///    mark the session revoked.
/// 2. Call [`AuthClient::refresh_token`] with the plaintext token.
/// 3. On success: if PAS rotated the token, re-encrypt it and return
///    as `rotated_ciphertext`. If PAS did not rotate, return `None`.
/// 4. On failure: delegate to [`classify_refresh_error`] and wrap in
///    `LivenessOutcome::Failed`.
///
/// The caller persists `last_verified_at = now` (and, for `Some(ct)`,
/// the new ciphertext) only when the outcome is
/// [`LivenessOutcome::Fresh`]. No I/O beyond the PAS call and the
/// in-memory cipher; the consumer owns its repository layer.
pub async fn attempt_liveness_refresh(
    cipher: &TokenCipher,
    client: &AuthClient,
    ciphertext: &str,
) -> LivenessOutcome {
    let plaintext = match cipher.decrypt(ciphertext) {
        Ok(p) => p,
        Err(_) => {
            return LivenessOutcome::Failed(LivenessFailure::Revoked {
                cause: RevokeCause::CipherFailure,
            });
        }
    };

    match client.refresh_token(&plaintext).await {
        Ok(TokenResponse { refresh_token, .. }) => match refresh_token.as_deref() {
            Some(new_rt) => match cipher.encrypt(new_rt) {
                Ok(new_ct) => LivenessOutcome::Fresh {
                    rotated_ciphertext: Some(new_ct),
                },
                // PAS confirmed liveness but re-encrypting the rotated
                // token failed. Local infrastructure issue (RNG, cipher
                // state). Serve the cache with the existing ciphertext;
                // the next request retries.
                Err(_) => LivenessOutcome::Failed(LivenessFailure::Transient {
                    retry_after: Some(DEFAULT_TRANSIENT_RETRY_AFTER),
                    cause: TransientCause::CipherEncryptFailed,
                }),
            },
            None => LivenessOutcome::Fresh {
                rotated_ciphertext: None,
            },
        },
        Err(e) => LivenessOutcome::Failed(classify_refresh_error(&e)),
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
    //! Pins the S-L3 invariant (transient failures never force logout)
    //! and the S-L1 consequence (cipher failure is terminal for the
    //! session). Table-test the classifier + one end-to-end test for
    //! the decrypt-fail short-circuit in `attempt_liveness_refresh`.

    use super::*;

    fn oauth_err(status: Option<u16>) -> Error {
        Error::OAuth {
            operation: "token refresh",
            status,
            detail: "test".into(),
        }
    }

    #[test]
    fn oauth_400_is_revoked_pas_rejected() {
        assert!(matches!(
            classify_refresh_error(&oauth_err(Some(400))),
            LivenessFailure::Revoked {
                cause: RevokeCause::PasRejected
            }
        ));
    }

    #[test]
    fn oauth_401_is_revoked_pas_rejected() {
        assert!(matches!(
            classify_refresh_error(&oauth_err(Some(401))),
            LivenessFailure::Revoked {
                cause: RevokeCause::PasRejected
            }
        ));
    }

    #[test]
    fn oauth_403_is_revoked_pas_rejected() {
        assert!(matches!(
            classify_refresh_error(&oauth_err(Some(403))),
            LivenessFailure::Revoked {
                cause: RevokeCause::PasRejected
            }
        ));
    }

    #[test]
    fn oauth_500_is_transient_pas_server_error() {
        assert!(matches!(
            classify_refresh_error(&oauth_err(Some(500))),
            LivenessFailure::Transient {
                cause: TransientCause::PasServerError,
                ..
            }
        ));
    }

    #[test]
    fn oauth_503_is_transient_pas_server_error() {
        assert!(matches!(
            classify_refresh_error(&oauth_err(Some(503))),
            LivenessFailure::Transient {
                cause: TransientCause::PasServerError,
                ..
            }
        ));
    }

    #[test]
    fn oauth_missing_status_is_transient_unknown() {
        // S-L3: an OAuth error with no status ONLY arises from a 2xx
        // response whose body failed to parse (see
        // `oauth.rs::send_and_deserialize`). That is a proxy/CDN issue,
        // not a PAS-side token rejection. Forcing logout on this class
        // of failure would violate the S-L3 invariant.
        assert!(matches!(
            classify_refresh_error(&oauth_err(None)),
            LivenessFailure::Transient {
                cause: TransientCause::Unknown,
                ..
            }
        ));
    }

    #[test]
    fn non_oauth_error_is_transient_unknown() {
        use crate::error::TokenError;
        assert!(matches!(
            classify_refresh_error(&Error::Token(TokenError::Expired)),
            LivenessFailure::Transient {
                cause: TransientCause::Unknown,
                ..
            }
        ));
    }

    #[tokio::test]
    async fn decrypt_failure_short_circuits_to_revoked_cipher_failure() {
        // Pin the S-L1 consequence: a ciphertext we cannot decrypt is
        // unrecoverable. The function must never reach the PAS call on
        // this path.
        use crate::oauth::{AuthClient, OAuthConfig};
        use base64::{Engine, engine::general_purpose::STANDARD};

        let key_b64 = STANDARD.encode([0u8; 32]);
        let cipher = TokenCipher::from_base64_key(&key_b64).unwrap();

        // Valid base64 that passes the nonce-length check but is not a
        // valid AEAD ciphertext under this key.
        let garbage_ct = STANDARD.encode([0u8; 64]);

        // AuthClient construction is side-effect-free; it is never
        // invoked on this path.
        let config = OAuthConfig::new("test-client", "https://example.invalid".parse().unwrap());
        let client = AuthClient::try_new(config).unwrap();

        let outcome = attempt_liveness_refresh(&cipher, &client, &garbage_ct).await;
        assert!(matches!(
            outcome,
            LivenessOutcome::Failed(LivenessFailure::Revoked {
                cause: RevokeCause::CipherFailure
            })
        ));
    }
}