calimero-node 0.10.1-rc.37

use std::sync::Arc;
use std::time::{Duration, Instant};

use calimero_blobstore::BlobManager as BlobStore;
use calimero_context_client::client::ContextClient;
use calimero_node_primitives::client::NodeClient;
use calimero_primitives::{blobs::BlobId, context::ContextId};
use dashmap::DashMap;
use tracing::{debug, warn};

use crate::constants;
use crate::delta_store::DeltaStore;
use crate::run::NodeMode;
use crate::specialized_node_invite_state::{
    new_pending_specialized_node_invites, PendingSpecializedNodeInvites,
};
use crate::sync::SyncManager;

/// Cached blob with access tracking for eviction
#[derive(Debug, Clone)]
pub struct CachedBlob {
    pub data: Arc<[u8]>,
    pub last_accessed: Instant,
}

impl CachedBlob {
    pub fn new(data: Arc<[u8]>) -> Self {
        Self {
            data,
            last_accessed: Instant::now(),
        }
    }

    pub fn touch(&mut self) {
        self.last_accessed = Instant::now();
    }
}

/// External service clients (injected dependencies)
#[derive(Debug, Clone)]
pub(crate) struct NodeClients {
    pub(crate) context: ContextClient,
    pub(crate) node: NodeClient,
}

/// Service managers (injected dependencies)
#[derive(Clone, Debug)]
pub(crate) struct NodeManagers {
    pub(crate) blobstore: BlobStore,
    pub(crate) sync: SyncManager,
}

/// State of a sync session for a context.
#[derive(Debug)]
pub(crate) enum SyncSessionState {
    /// Buffering deltas during snapshot sync.
    /// The sync_start_hlc is stored in the DeltaBuffer itself.
    BufferingDeltas,
}

impl SyncSessionState {
    /// Check if we should buffer incoming deltas.
    pub fn should_buffer_deltas(&self) -> bool {
        matches!(self, Self::BufferingDeltas)
    }
}

/// Active sync session for a context.
#[derive(Debug)]
pub(crate) struct SyncSession {
    /// Current state of the sync.
    pub state: SyncSessionState,
    /// Buffer for deltas received during sync.
    pub delta_buffer: calimero_node_primitives::delta_buffer::DeltaBuffer,
    /// Timestamp of last drop warning (for rate limiting).
    pub last_drop_warning: Option<Instant>,
}

/// Mutable runtime state
#[derive(Clone, Debug)]
pub(crate) struct NodeState {
    pub(crate) blob_cache: Arc<DashMap<BlobId, CachedBlob>>,
    pub(crate) delta_stores: Arc<DashMap<ContextId, DeltaStore>>,
    /// Pending specialized node invites (standard node side) - tracks context_id/inviter for incoming verifications
    pub(crate) pending_specialized_node_invites: PendingSpecializedNodeInvites,
    /// Whether to accept mock TEE attestation (from config, for testing only)
    pub(crate) accept_mock_tee: bool,
    /// Node operation mode (Standard or ReadOnly)
    pub(crate) node_mode: NodeMode,
    /// Active sync sessions (for delta buffering during snapshot sync).
    pub(crate) sync_sessions: Arc<DashMap<ContextId, SyncSession>>,
    /// Per-context queue of state deltas whose `governance_position` references
    /// governance heads that aren't yet known locally (B2 buffer-on-Unknown).
    ///
    /// Drained lazily on the next state-delta receive for the same context: each
    /// pending delta is re-evaluated via `membership_status_at`; if governance has
    /// caught up the delta is processed (applied or rejected by B3), otherwise it
    /// is pushed back. Lazy drain trades a small worst-case latency (until the
    /// next state delta arrives in the same context) for not having to plumb a
    /// notification path from the governance-apply path into this buffer.
    ///
    /// Per-context capacity is bounded by [`MAX_GOVERNANCE_PENDING_PER_CONTEXT`]
    /// with FIFO eviction of the oldest entry. Without the bound, a peer
    /// flooding deltas with unknown governance heads could exhaust memory; the
    /// hash-heartbeat divergence path will catch any legitimate eviction
    /// victim by triggering snapshot sync.
    pub(crate) governance_pending: Arc<
        DashMap<
            ContextId,
            std::collections::VecDeque<calimero_node_primitives::delta_buffer::BufferedDelta>,
        >,
    >,
}

/// Maximum number of state deltas that may sit in the governance-pending
/// buffer for a single context simultaneously. Exceeding this evicts the
/// oldest entry FIFO. Sized for normal partition-recovery — a few seconds
/// of held deltas at typical send rates — not for adversarial flooding.
pub(crate) const MAX_GOVERNANCE_PENDING_PER_CONTEXT: usize = 256;

impl NodeState {
    pub(crate) fn blob_cache_handle(&self) -> Arc<DashMap<BlobId, CachedBlob>> {
        self.blob_cache.clone()
    }

    pub(crate) fn delta_stores_handle(&self) -> Arc<DashMap<ContextId, DeltaStore>> {
        self.delta_stores.clone()
    }

    pub(crate) fn pending_specialized_node_invites_handle(&self) -> PendingSpecializedNodeInvites {
        self.pending_specialized_node_invites.clone()
    }

    pub(crate) const fn accept_mock_tee(&self) -> bool {
        self.accept_mock_tee
    }

    pub(crate) const fn node_mode(&self) -> NodeMode {
        self.node_mode
    }

    pub(crate) fn new(accept_mock_tee: bool, node_mode: NodeMode) -> Self {
        Self {
            blob_cache: Arc::new(DashMap::new()),
            delta_stores: Arc::new(DashMap::new()),
            pending_specialized_node_invites: new_pending_specialized_node_invites(),
            accept_mock_tee,
            node_mode,
            sync_sessions: Arc::new(DashMap::new()),
            governance_pending: Arc::new(DashMap::new()),
        }
    }

    /// Push a state delta into the governance-pending buffer. Used by B2 when
    /// `membership_status_at` returns `Unknown { needed }` — the referenced
    /// governance heads aren't yet known locally, so the delta cannot be
    /// authorized until governance catches up.
    ///
    /// Skips the push if a delta with the same `id` is already in the
    /// queue (gossipsub re-delivers are common; double-buffering would
    /// re-apply the same delta twice when the drain fires).
    ///
    /// FIFO-evicts the oldest entry if pushing would exceed
    /// [`MAX_GOVERNANCE_PENDING_PER_CONTEXT`]. Eviction emits a warn log so
    /// operators can spot DoS-shaped traffic.
    pub(crate) fn buffer_governance_pending(
        &self,
        context_id: ContextId,
        delta: calimero_node_primitives::delta_buffer::BufferedDelta,
    ) {
        let mut entry = self.governance_pending.entry(context_id).or_default();
        // Deduplicate by delta_id — gossipsub re-delivery shouldn't
        // amplify the buffer or cause repeated re-apply work on drain.
        if entry.iter().any(|existing| existing.id == delta.id) {
            debug!(
                %context_id,
                delta_id = ?delta.id,
                "governance-pending buffer: skipping duplicate"
            );
            return;
        }
        if entry.len() >= MAX_GOVERNANCE_PENDING_PER_CONTEXT {
            let evicted = entry.pop_front();
            warn!(
                %context_id,
                evicted_id = ?evicted.as_ref().map(|d| d.id),
                cap = MAX_GOVERNANCE_PENDING_PER_CONTEXT,
                "governance-pending buffer at capacity; evicting oldest"
            );
        }
        entry.push_back(delta);
    }

    /// Pop the front-most pending delta for `context_id`, leaving the rest
    /// of the queue in place. Returns `None` if the buffer is empty.
    ///
    /// Used by the drain loop in
    /// `state_delta::drain_governance_pending` instead of a bulk
    /// drain-all-then-process: if `apply_authorized_state_delta` panics
    /// or the actor task is killed mid-iteration, only the in-flight
    /// delta is lost — the rest stay in the buffer and get re-tried by
    /// the next drain pass. The bulk-drain version (commit history) was
    /// flagged by review for losing every still-unprocessed delta on
    /// panic.
    pub(crate) fn pop_governance_pending(
        &self,
        context_id: &ContextId,
    ) -> Option<calimero_node_primitives::delta_buffer::BufferedDelta> {
        let mut entry = self.governance_pending.get_mut(context_id)?;
        let popped = entry.pop_front();
        // Don't remove the now-empty VecDeque here. A previous version of
        // this code did `drop(entry); remove(context_id)`, which had a
        // race: a concurrent `buffer_governance_pending` could insert a
        // fresh delta between the lock-drop and the remove, and the
        // remove would silently lose that newly-inserted delta. Leaving
        // an empty VecDeque costs ~24 bytes per context that ever had
        // pending entries, which is bounded and trivial. If empty-entry
        // accumulation ever matters, a periodic GC pass that holds the
        // entry-write-lock and `remove_if(|q| q.is_empty())` is the
        // race-free way to clean up.
        popped
    }

    /// Returns the current length of the governance-pending buffer for a
    /// context. Used by the drain loop's iteration cap so we can't get
    /// stuck draining indefinitely if a delta keeps re-buffering itself
    /// (the per-delta `governance_drain_attempts` counter is the deeper
    /// guard, but this is a cheap pre-check).
    pub(crate) fn governance_pending_len(&self, context_id: &ContextId) -> usize {
        self.governance_pending
            .get(context_id)
            .map(|q| q.len())
            .unwrap_or(0)
    }

    /// List every `ContextId` that currently has at least one entry in the
    /// governance-pending buffer. Used by the namespace-governance apply
    /// path to trigger drains across all affected contexts when a
    /// governance op lands — without this, the lazy on-state-delta drain
    /// alone deadlocks if the only state delta in flight is one waiting
    /// for that very governance op.
    pub(crate) fn governance_pending_context_ids(&self) -> Vec<ContextId> {
        self.governance_pending
            .iter()
            .map(|entry| *entry.key())
            .collect()
    }

    /// Check if we should buffer a delta (during snapshot sync).
    pub(crate) fn should_buffer_delta(&self, context_id: &ContextId) -> bool {
        self.sync_sessions
            .get(context_id)
            .is_some_and(|session| session.state.should_buffer_deltas())
    }

    /// Buffer a delta during snapshot sync (Invariant I6).
    ///
    /// Returns `Some(PushResult)` if there was an active session, `None` if no session.
    ///
    /// The `PushResult` indicates what happened:
    /// - `Added`: Delta was buffered successfully
    /// - `Duplicate`: Delta ID was already buffered (no action)
    /// - `Evicted(id)`: Delta was buffered but oldest was evicted
    /// - `DroppedZeroCapacity(id)`: Delta was dropped (zero capacity)
    ///
    /// If the buffer is full, the oldest delta is evicted (oldest-first policy)
    /// and a rate-limited warning is logged. Drops are tracked via metrics.
    pub(crate) fn buffer_delta(
        &self,
        context_id: &ContextId,
        delta: calimero_node_primitives::delta_buffer::BufferedDelta,
    ) -> Option<calimero_node_primitives::delta_buffer::PushResult> {
        use calimero_node_primitives::delta_buffer::PushResult;

        if let Some(mut session) = self.sync_sessions.get_mut(context_id) {
            let incoming_delta_id = delta.id;
            let result = session.delta_buffer.push(delta);

            if result.had_data_loss() {
                // A delta was lost - log rate-limited warning
                let should_warn = session.last_drop_warning.is_none_or(|last| {
                    last.elapsed()
                        > Duration::from_secs(constants::DELTA_BUFFER_DROP_WARNING_RATE_LIMIT_S)
                });

                if should_warn {
                    session.last_drop_warning = Some(Instant::now());
                    let (evicted_id, reason) = match &result {
                        PushResult::Evicted(id) => (id, "buffer overflow"),
                        PushResult::DroppedZeroCapacity(id) => (id, "zero capacity"),
                        _ => unreachable!(),
                    };
                    warn!(
                        %context_id,
                        lost_delta_id = ?evicted_id,
                        incoming_delta_id = ?incoming_delta_id,
                        reason = reason,
                        drops = session.delta_buffer.drops(),
                        buffer_size = session.delta_buffer.len(),
                        capacity = session.delta_buffer.capacity(),
                        "Delta buffer data loss - {} (I6 violation risk)",
                        reason
                    );
                }

                // TODO (#4): Export drops to Prometheus metrics
                // metrics::counter!("calimero_sync_buffer_drops", "context_id" => context_id.to_string()).increment(1);
            }

            Some(result)
        } else {
            None // No active session
        }
    }

    /// Start a sync session for a context (enables delta buffering).
    ///
    /// Buffer capacity defaults to 10,000 deltas per context.
    pub(crate) fn start_sync_session(&self, context_id: ContextId, sync_start_hlc: u64) {
        self.start_sync_session_with_capacity(
            context_id,
            sync_start_hlc,
            calimero_node_primitives::delta_buffer::DEFAULT_BUFFER_CAPACITY,
        );
    }

    /// Start a sync session with custom buffer capacity.
    ///
    /// # Capacity Warning (#7)
    ///
    /// If capacity is below `MIN_RECOMMENDED_CAPACITY`, a warning is logged.
    /// Zero capacity is valid but will drop ALL deltas.
    pub(crate) fn start_sync_session_with_capacity(
        &self,
        context_id: ContextId,
        sync_start_hlc: u64,
        capacity: usize,
    ) {
        use calimero_node_primitives::delta_buffer::{DeltaBuffer, MIN_RECOMMENDED_CAPACITY};

        // (#7) Warn if capacity is below recommended minimum
        if capacity < MIN_RECOMMENDED_CAPACITY {
            warn!(
                %context_id,
                capacity,
                min_recommended = MIN_RECOMMENDED_CAPACITY,
                "Delta buffer capacity below recommended minimum - may cause excessive data loss"
            );
        }

        debug!(
            %context_id,
            sync_start_hlc,
            capacity,
            "Starting sync session with delta buffering"
        );

        self.sync_sessions.insert(
            context_id,
            SyncSession {
                state: SyncSessionState::BufferingDeltas,
                delta_buffer: DeltaBuffer::new(capacity, sync_start_hlc),
                last_drop_warning: None,
            },
        );
    }

    /// End a sync session and return buffered deltas for replay.
    ///
    /// Call this after sync completes successfully. Buffered deltas should be
    /// replayed in FIFO order to preserve causality.
    pub(crate) fn end_sync_session(
        &self,
        context_id: &ContextId,
    ) -> Option<Vec<calimero_node_primitives::delta_buffer::BufferedDelta>> {
        if let Some((_, mut session)) = self.sync_sessions.remove(context_id) {
            let drops = session.delta_buffer.drops();
            let buffered_count = session.delta_buffer.len();

            if drops > 0 {
                warn!(
                    %context_id,
                    drops,
                    buffered_count,
                    "Sync session ended with {} dropped deltas (I6 partial violation)",
                    drops
                );
            } else {
                debug!(
                    %context_id,
                    buffered_count,
                    "Sync session ended successfully"
                );
            }

            Some(session.delta_buffer.drain())
        } else {
            None
        }
    }

    /// Cancel a sync session and discard buffered deltas.
    ///
    /// Call this on sync error/failure. Buffered deltas are discarded since
    /// the sync didn't complete and the context state may be inconsistent.
    pub(crate) fn cancel_sync_session(&self, context_id: &ContextId) {
        if let Some((_, session)) = self.sync_sessions.remove(context_id) {
            let drops = session.delta_buffer.drops();
            let buffered_count = session.delta_buffer.len();

            warn!(
                %context_id,
                buffered_count,
                drops,
                "Sync session cancelled - discarding buffered deltas"
            );
        }
    }

    /// Evict blobs from cache based on age, count, and memory limits
    pub(crate) fn evict_old_blobs(&self) {
        let now = Instant::now();
        let before_count = self.blob_cache.len();

        // Phase 1: Remove blobs older than MAX_BLOB_AGE
        self.blob_cache.retain(|_, cached_blob| {
            now.duration_since(cached_blob.last_accessed)
                < Duration::from_secs(constants::MAX_BLOB_AGE_S)
        });

        let after_time_eviction = self.blob_cache.len();

        // Phase 2: If still over count limit, remove least recently used
        if self.blob_cache.len() > constants::MAX_BLOB_CACHE_COUNT {
            let mut blobs: Vec<_> = self
                .blob_cache
                .iter()
                .map(|entry| (*entry.key(), entry.value().last_accessed))
                .collect();

            // Sort by last_accessed (oldest first)
            blobs.sort_by_key(|(_, accessed)| *accessed);

            // Remove oldest until under count limit
            let to_remove = self.blob_cache.len() - constants::MAX_BLOB_CACHE_COUNT;
            for (blob_id, _) in blobs.iter().take(to_remove) {
                let _removed = self.blob_cache.remove(blob_id);
            }
        }

        let after_count_eviction = self.blob_cache.len();

        // Phase 3: If still over memory limit, remove by LRU until under budget
        let total_size: usize = self
            .blob_cache
            .iter()
            .map(|entry| entry.value().data.len())
            .sum();

        if total_size > constants::MAX_BLOB_CACHE_SIZE_BYTES {
            let mut blobs: Vec<_> = self
                .blob_cache
                .iter()
                .map(|entry| {
                    (
                        *entry.key(),
                        entry.value().last_accessed,
                        entry.value().data.len(),
                    )
                })
                .collect();

            // Sort by last_accessed (oldest first)
            blobs.sort_by_key(|(_, accessed, _)| *accessed);

            let mut current_size = total_size;
            let mut removed_count = 0;

            for (blob_id, _, size) in blobs {
                if current_size <= constants::MAX_BLOB_CACHE_SIZE_BYTES {
                    break;
                }
                let _removed = self.blob_cache.remove(&blob_id);
                current_size = current_size.saturating_sub(size);
                removed_count += 1;
            }

            if removed_count > 0 {
                #[expect(
                    clippy::integer_division,
                    reason = "MB conversion for logging, precision not critical"
                )]
                let freed_mb = total_size.saturating_sub(current_size) / 1024 / 1024;
                #[expect(
                    clippy::integer_division,
                    reason = "MB conversion for logging, precision not critical"
                )]
                let new_size_mb = current_size / 1024 / 1024;
                tracing::debug!(
                    removed_count,
                    freed_mb,
                    new_size_mb,
                    "Evicted blobs to stay under memory limit"
                );
            }
        }

        let total_evicted = before_count.saturating_sub(self.blob_cache.len());
        if total_evicted > 0 {
            tracing::debug!(
                total_evicted,
                time_evicted = before_count.saturating_sub(after_time_eviction),
                count_evicted = after_time_eviction.saturating_sub(after_count_eviction),
                memory_evicted = after_count_eviction.saturating_sub(self.blob_cache.len()),
                remaining_count = self.blob_cache.len(),
                "Blob cache eviction completed"
            );
        }
    }
}