cellos-sink-jetstream 0.5.1

//! JetStream [`cellos_core::ports::EventSink`] — publishes JSON payloads to a configured subject.
//!
//! ## Reconnect / circuit-breaker model (P3-01)
//!
//! `JetStreamEventSink` keeps a small in-memory state machine in front of the
//! underlying NATS connection so an extended broker outage cannot stall the
//! supervisor's `emit()` path:
//!
//! - **`Connected`** — last publish succeeded. Next `emit()` calls
//!   [`Publisher::publish`] directly (with a small bounded retry, see
//!   [`with_retry`]).
//! - **`Reconnecting { attempt, next_after }`** — last publish failed.
//!   `emit()` returns `Err` immediately while `Instant::now() < next_after`
//!   (the circuit is "open"). Once `next_after` has elapsed the sink takes
//!   one bounded probe attempt; on success the state resets to `Connected`,
//!   on failure `attempt` is incremented and `next_after` is pushed forward
//!   by an exponentially-backed-off interval.
//!
//! The async-nats `Client` performs its own TCP-level reconnect under the
//! hood — this state machine is layered on top so that the supervisor's
//! `emit()` returns Err quickly during an outage instead of being held by
//! the underlying client's blocking publish path. Operators decide what to
//! do with that Err (typically log and continue).
//!
//! Backoff schedule: `100ms * 2^attempt` with `attempt` saturating at the
//! cap that keeps the value below 3 minutes (`180s`). So the sequence is
//! `100ms, 200ms, 400ms, 800ms, 1.6s, 3.2s, …, 102.4s, 180s, 180s, …`.
//! Backoff resets to `attempt = 0` on the first successful publish after an
//! outage (acceptance: "Backoff resets on successful publish").

use std::fmt::Display;
use std::future::Future;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;

use async_trait::async_trait;
use bytes::Bytes;
use tokio::sync::Mutex;
use tokio::time::Instant;
use tracing::{debug, instrument, warn};

use async_nats::jetstream;

use cellos_core::ports::EventSink;
use cellos_core::{redact_url_if_echoed_in_text, CellosError, CloudEventV1};

/// Maximum number of publish attempts before giving up.
const PUBLISH_MAX_ATTEMPTS: u32 = 3;
/// Base backoff between attempts; doubled (cubed, ...) on subsequent retries.
/// With base = 100ms and exponent base 2: 100ms, 400ms.
const PUBLISH_BACKOFF_BASE: Duration = Duration::from_millis(100);

/// Base for the reconnect-state backoff schedule (P3-01).
///
/// The first reconnect wait is `RECONNECT_BACKOFF_BASE` (100ms). Subsequent
/// waits double until the cap [`RECONNECT_BACKOFF_CAP`].
const RECONNECT_BACKOFF_BASE: Duration = Duration::from_millis(100);

/// Cap on the reconnect-state backoff: never wait longer than 3 minutes
/// before the next probe (P3-01 acceptance).
const RECONNECT_BACKOFF_CAP: Duration = Duration::from_secs(180);

/// Compute the backoff for reconnect attempt `attempt` (0-indexed).
///
/// `attempt = 0` → `RECONNECT_BACKOFF_BASE` (100ms).
/// `attempt = N` → `RECONNECT_BACKOFF_BASE * 2^N`, clamped to
/// [`RECONNECT_BACKOFF_CAP`] (180s). Saturates safely on overflow.
pub(crate) fn reconnect_backoff(attempt: u32) -> Duration {
    // 2^attempt, saturating; clamp to cap to avoid Duration overflow.
    let factor = 1u64.checked_shl(attempt).unwrap_or(u64::MAX);
    let raw = RECONNECT_BACKOFF_BASE
        .checked_mul(u32::try_from(factor).unwrap_or(u32::MAX))
        .unwrap_or(RECONNECT_BACKOFF_CAP);
    raw.min(RECONNECT_BACKOFF_CAP)
}

/// Subject template placeholder for the per-tenant isolation dimension (A2-03).
///
/// When the configured subject template contains this token, [`resolve_tenant_subject`]
/// substitutes the event's `data.tenantId` field at publish time. Templates without
/// the placeholder are forwarded verbatim — single-tenant deployments produce
/// byte-identical wire output to pre-A2-03 builds.
pub const TENANT_ID_PLACEHOLDER: &str = "{tenantId}";

/// Sentinel value substituted for `{tenantId}` when the event carries no tenant.
///
/// Chosen so the resulting subject is always a valid NATS token (no dots, no
/// wildcards). Operators running multi-tenant streams who want a hard guarantee
/// that untenanted events never land on a tenant subject can configure their
/// JetStream stream filter to exclude `cellos.events.single.*`.
pub const TENANT_ID_DEFAULT_TOKEN: &str = "single";

/// Resolve a JetStream subject for `event` against `template`.
///
/// - Templates without [`TENANT_ID_PLACEHOLDER`] are returned unchanged
///   (single-tenant noop, byte-identical to pre-A2-03 behaviour).
/// - When the placeholder is present, it is replaced with `event.data.tenantId`
///   if the field is set, or [`TENANT_ID_DEFAULT_TOKEN`] otherwise.
///
/// The tenant id read here comes from `data.tenantId`, which the cellos-core
/// event constructors mirror from `spec.correlation.tenantId` (A2-03). See
/// `cellos_core::events::lifecycle_started_data_v1`.
pub fn resolve_tenant_subject(template: &str, event: &CloudEventV1) -> String {
    if !template.contains(TENANT_ID_PLACEHOLDER) {
        return template.to_string();
    }
    let tenant = event
        .data
        .as_ref()
        .and_then(|d| d.get("tenantId"))
        .and_then(|v| v.as_str())
        .unwrap_or(TENANT_ID_DEFAULT_TOKEN);
    template.replace(TENANT_ID_PLACEHOLDER, tenant)
}

/// Run `f` up to `max_attempts` times with exponential backoff on failure.
///
/// Backoff between attempt N and attempt N+1 is `base * 4^(N-1)` (using a
/// factor of 4 so attempts 1→2→3 wait 100ms then 400ms with `base = 100ms`).
/// Each retry emits a `tracing::warn!` with the attempt number and the error.
/// On the final failure the error is returned to the caller; no warn is
/// emitted for that final failure (the caller handles terminal logging).
///
/// `f` is called fresh for each attempt — it must be a closure that produces
/// a new future, not a single future polled multiple times.
pub async fn with_retry<F, Fut, T, E>(max_attempts: u32, mut f: F) -> Result<T, E>
where
    F: FnMut() -> Fut,
    Fut: Future<Output = Result<T, E>>,
    E: Display,
{
    let mut attempt: u32 = 1;
    loop {
        match f().await {
            Ok(value) => return Ok(value),
            Err(err) if attempt >= max_attempts => return Err(err),
            Err(err) => {
                // base * 4^(attempt-1): 1→100ms, 2→400ms, 3→1.6s, ...
                let backoff = PUBLISH_BACKOFF_BASE.saturating_mul(4u32.saturating_pow(attempt - 1));
                warn!(
                    attempt,
                    max_attempts,
                    backoff_ms = backoff.as_millis() as u64,
                    error = %err,
                    "publish attempt failed; retrying after backoff"
                );
                tokio::time::sleep(backoff).await;
                attempt += 1;
            }
        }
    }
}

/// Reconnect state for [`JetStreamEventSink`].
///
/// `Connected` is the steady state. We move to `Reconnecting { .. }` after a
/// publish error and stay there until a probe publish succeeds.
///
/// Public + `#[doc(hidden)]` so `tests/reconnect.rs` can pattern-match on
/// the snapshot returned by [`JetStreamEventSink::debug_state`]; not part
/// of the stable surface area.
#[doc(hidden)]
#[derive(Debug, Clone)]
pub enum ReconnectState {
    /// Last observed publish succeeded; emit takes the fast path.
    Connected,
    /// Last observed publish failed. `attempt` is the 0-indexed reconnect
    /// attempt count (incremented on each consecutive failure). `next_after`
    /// is the earliest [`Instant`] at which the sink will take its next
    /// probe attempt; before that, `emit()` returns Err immediately.
    Reconnecting {
        /// Number of consecutive failed reconnect probes (0 means "we just
        /// transitioned to Reconnecting and the next probe is allowed once
        /// `next_after` elapses").
        attempt: u32,
        /// Earliest instant at which the next probe attempt is allowed.
        next_after: Instant,
    },
}

/// Trait abstraction over JetStream publish, so the reconnect state machine
/// can be exercised by integration tests with a mock that simulates broker
/// outages.
///
/// This is `pub` (with `#[doc(hidden)]`) only so `tests/reconnect.rs` can
/// drive the state machine without a real NATS server; production wiring
/// uses the inherent `connect*` constructors and never sees this trait.
#[doc(hidden)]
#[async_trait]
pub trait Publisher: Send + Sync {
    /// Attempt to publish `payload` to `subject`.
    async fn publish(&self, subject: String, payload: Bytes) -> Result<(), CellosError>;
}

/// Real `Publisher` backed by an `async_nats::jetstream::Context`.
struct JetStreamPublisher {
    context: jetstream::Context,
}

#[async_trait]
impl Publisher for JetStreamPublisher {
    async fn publish(&self, subject: String, payload: Bytes) -> Result<(), CellosError> {
        self.context
            .publish(subject, payload)
            .await
            .map_err(|e| CellosError::EventSink(format!("jetstream publish: {e}")))?;
        Ok(())
    }
}

/// Publishes serialized [`CloudEventV1`] to JetStream.
///
/// See module docs for the reconnect / circuit-breaker semantics.
pub struct JetStreamEventSink {
    publisher: Arc<dyn Publisher>,
    subject: String,
    state: Arc<Mutex<ReconnectState>>,
}

impl JetStreamEventSink {
    /// Connect to NATS and wrap the JetStream context. Stream must exist or server auto-creates per NATS config.
    pub async fn connect(nats_url: &str, subject: impl Into<String>) -> Result<Self, CellosError> {
        Self::connect_with_root_ca(nats_url, subject, None).await
    }

    /// Like [`Self::connect`], but trust an extra PEM root (e.g. dev CA via `NATS_CA_FILE` for `tls://` URLs).
    pub async fn connect_with_root_ca(
        nats_url: &str,
        subject: impl Into<String>,
        root_ca_pem_file: Option<&Path>,
    ) -> Result<Self, CellosError> {
        let mut opts = async_nats::ConnectOptions::new();
        if let Some(path) = root_ca_pem_file {
            opts = opts.add_root_certificates(path.to_path_buf());
        }
        let conn = opts.connect(nats_url).await.map_err(|e| {
            let msg = redact_url_if_echoed_in_text(&e.to_string(), nats_url);
            CellosError::EventSink(format!("nats connect: {msg}"))
        })?;
        let context = jetstream::new(conn);
        Ok(Self {
            publisher: Arc::new(JetStreamPublisher { context }),
            subject: subject.into(),
            state: Arc::new(Mutex::new(ReconnectState::Connected)),
        })
    }

    /// Test-only constructor: build a sink around an arbitrary `Publisher`
    /// (used by `tests/reconnect.rs` to simulate broker outages without
    /// running a real NATS server). Hidden from rustdoc and not intended
    /// for production callers — production wiring uses [`Self::connect`].
    #[doc(hidden)]
    pub fn from_publisher(publisher: Arc<dyn Publisher>, subject: impl Into<String>) -> Self {
        Self {
            publisher,
            subject: subject.into(),
            state: Arc::new(Mutex::new(ReconnectState::Connected)),
        }
    }

    /// Snapshot the current reconnect state. Hidden from rustdoc; exists
    /// so the reconnect integration test can verify state transitions.
    #[doc(hidden)]
    pub async fn debug_state(&self) -> ReconnectState {
        self.state.lock().await.clone()
    }
}

#[async_trait]
impl EventSink for JetStreamEventSink {
    #[instrument(skip(self, event), fields(ce_id = %event.id, ce_type = %event.ty))]
    async fn emit(&self, event: &CloudEventV1) -> Result<(), CellosError> {
        let payload = serde_json::to_vec(event)
            .map_err(|e| CellosError::EventSink(format!("serialize CloudEvent: {e}")))?;
        let payload = Bytes::from(payload);

        // Circuit-breaker check: if we are mid-outage and the next probe is
        // not yet due, return Err immediately. This is the bound that keeps
        // the supervisor's emit() path from blocking on broker outages.
        {
            let state = self.state.lock().await;
            if let ReconnectState::Reconnecting {
                attempt,
                next_after,
            } = *state
            {
                let now = Instant::now();
                if now < next_after {
                    let wait_ms = next_after.saturating_duration_since(now).as_millis() as u64;
                    return Err(CellosError::EventSink(format!(
                        "jetstream sink in reconnecting state (attempt={attempt}, next probe in {wait_ms}ms)"
                    )));
                }
            }
        }

        // Resolve `{tenantId}` substitution per event (A2-03). When the
        // configured subject does not contain the placeholder this is a
        // copy-only noop: single-tenant deployments still publish to the
        // exact subject they configured.
        let resolved_subject = resolve_tenant_subject(&self.subject, event);

        // Probe / steady-state publish path. `with_retry` handles transient
        // hiccups inside a single emit() call; persistent failure is what
        // drives the reconnect state machine below.
        let publish_result = with_retry(PUBLISH_MAX_ATTEMPTS, || {
            let publisher = Arc::clone(&self.publisher);
            let subject = resolved_subject.clone();
            let payload = payload.clone();
            async move { publisher.publish(subject, payload).await }
        })
        .await;

        match publish_result {
            Ok(()) => {
                // Success path: reset state to Connected (resetting backoff
                // is the explicit P3-01 acceptance).
                let mut state = self.state.lock().await;
                if matches!(*state, ReconnectState::Reconnecting { .. }) {
                    debug!("jetstream sink: publish recovered; resetting backoff to Connected");
                }
                *state = ReconnectState::Connected;
                Ok(())
            }
            Err(e) => {
                // Failure path: bump the reconnect-state attempt counter and
                // schedule the next probe.
                let mut state = self.state.lock().await;
                let next_attempt = match *state {
                    ReconnectState::Connected => 0,
                    ReconnectState::Reconnecting { attempt, .. } => attempt.saturating_add(1),
                };
                let backoff = reconnect_backoff(next_attempt);
                let next_after = Instant::now() + backoff;
                warn!(
                    attempt = next_attempt,
                    backoff_ms = backoff.as_millis() as u64,
                    error = %e,
                    "jetstream sink: publish failed; entering reconnecting state"
                );
                *state = ReconnectState::Reconnecting {
                    attempt: next_attempt,
                    next_after,
                };
                Err(e)
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::sync::atomic::{AtomicU32, Ordering};
    use std::sync::Arc;

    /// Pause tokio's clock so backoff sleeps don't burn real wall time in
    /// tests; auto-advance any pending sleeps as soon as nothing else is
    /// runnable on the runtime.
    fn rt() -> tokio::runtime::Runtime {
        tokio::runtime::Builder::new_current_thread()
            .enable_time()
            .start_paused(true)
            .build()
            .unwrap()
    }

    #[test]
    fn with_retry_succeeds_on_first_try() {
        let calls = Arc::new(AtomicU32::new(0));
        let result: Result<u32, &'static str> = rt().block_on(async {
            let calls = calls.clone();
            with_retry(3, || {
                let calls = calls.clone();
                async move {
                    calls.fetch_add(1, Ordering::SeqCst);
                    Ok::<u32, &'static str>(42)
                }
            })
            .await
        });
        assert_eq!(result, Ok(42));
        assert_eq!(calls.load(Ordering::SeqCst), 1);
    }

    #[test]
    fn with_retry_recovers_after_transient_failures() {
        let calls = Arc::new(AtomicU32::new(0));
        let calls_for_assert = calls.clone();
        let result: Result<&'static str, &'static str> = rt().block_on(async move {
            with_retry(3, || {
                let calls = calls.clone();
                async move {
                    let n = calls.fetch_add(1, Ordering::SeqCst) + 1;
                    if n < 3 {
                        Err("transient")
                    } else {
                        Ok("ok")
                    }
                }
            })
            .await
        });
        assert_eq!(result, Ok("ok"));
        assert_eq!(calls_for_assert.load(Ordering::SeqCst), 3);
    }

    #[test]
    fn with_retry_returns_last_error_after_exhaustion() {
        let calls = Arc::new(AtomicU32::new(0));
        let calls_for_assert = calls.clone();
        let result: Result<(), String> = rt().block_on(async move {
            with_retry(3, || {
                let calls = calls.clone();
                async move {
                    let n = calls.fetch_add(1, Ordering::SeqCst) + 1;
                    Err::<(), String>(format!("fail-{n}"))
                }
            })
            .await
        });
        assert_eq!(result, Err("fail-3".into()));
        assert_eq!(calls_for_assert.load(Ordering::SeqCst), 3);
    }

    fn ce(data: Option<serde_json::Value>) -> CloudEventV1 {
        CloudEventV1 {
            specversion: "1.0".into(),
            id: "ce-1".into(),
            source: "test".into(),
            ty: "dev.cellos.events.cell.lifecycle.v1.started".into(),
            datacontenttype: Some("application/json".into()),
            data,
            time: None,
            traceparent: None,
        }
    }

    #[test]
    fn resolve_tenant_subject_template_without_placeholder_is_passthrough() {
        let event = ce(Some(serde_json::json!({"tenantId": "acme"})));
        assert_eq!(
            resolve_tenant_subject("cellos.events.v1", &event),
            "cellos.events.v1"
        );
    }

    #[test]
    fn resolve_tenant_subject_substitutes_when_tenant_present() {
        let event = ce(Some(serde_json::json!({"tenantId": "acme"})));
        assert_eq!(
            resolve_tenant_subject("cellos.events.{tenantId}.v1", &event),
            "cellos.events.acme.v1"
        );
    }

    #[test]
    fn resolve_tenant_subject_uses_sentinel_when_tenant_absent() {
        let event = ce(Some(serde_json::json!({"cellId": "c1"})));
        assert_eq!(
            resolve_tenant_subject("cellos.events.{tenantId}.v1", &event),
            "cellos.events.single.v1"
        );
    }

    #[test]
    fn resolve_tenant_subject_uses_sentinel_when_data_missing() {
        let event = ce(None);
        assert_eq!(
            resolve_tenant_subject("cellos.events.{tenantId}.v1", &event),
            "cellos.events.single.v1"
        );
    }

    #[test]
    fn with_retry_single_attempt_does_not_retry() {
        let calls = Arc::new(AtomicU32::new(0));
        let calls_for_assert = calls.clone();
        let result: Result<(), &'static str> = rt().block_on(async move {
            with_retry(1, || {
                let calls = calls.clone();
                async move {
                    calls.fetch_add(1, Ordering::SeqCst);
                    Err::<(), &'static str>("nope")
                }
            })
            .await
        });
        assert_eq!(result, Err("nope"));
        assert_eq!(calls_for_assert.load(Ordering::SeqCst), 1);
    }

    #[test]
    fn reconnect_backoff_schedule_is_exponential_and_capped() {
        // 0 → 100ms, 1 → 200ms, 2 → 400ms, 3 → 800ms, 4 → 1.6s, ...
        assert_eq!(reconnect_backoff(0), Duration::from_millis(100));
        assert_eq!(reconnect_backoff(1), Duration::from_millis(200));
        assert_eq!(reconnect_backoff(2), Duration::from_millis(400));
        assert_eq!(reconnect_backoff(3), Duration::from_millis(800));
        assert_eq!(reconnect_backoff(4), Duration::from_millis(1600));
        // Cap at 3 minutes regardless of how high the attempt count climbs.
        assert_eq!(reconnect_backoff(20), RECONNECT_BACKOFF_CAP);
        assert_eq!(reconnect_backoff(u32::MAX), RECONNECT_BACKOFF_CAP);
    }
}