calimero-node 0.10.1-rc.38

//! Sync manager and orchestration.
//!
//! **Purpose**: Coordinates periodic syncs, selects peers, and delegates to protocols.
//! **Strategy**: Try delta sync first, fallback to state sync on failure.

use std::collections::HashMap;
use std::pin::pin;
use std::sync::Arc;

use calimero_context_client::client::ContextClient;
use calimero_crypto::{Nonce, SharedKey};
use calimero_network_primitives::client::NetworkClient;
use calimero_network_primitives::stream::Stream;
use calimero_node_primitives::client::{NamespaceJoinParams, NodeClient, OpenSubgroupJoinParams};
use calimero_node_primitives::join_bundle::JoinBundle;
use calimero_node_primitives::sync::{InitPayload, MessagePayload, StreamMessage};
use calimero_primitives::common::DIGEST_SIZE;
use calimero_primitives::context::ContextId;
use calimero_primitives::identity::PublicKey;
use eyre::bail;
use eyre::WrapErr;
use futures_util::stream::{self};
use futures_util::StreamExt;
use libp2p::gossipsub::TopicHash;
use libp2p::PeerId;
use rand::seq::SliceRandom;
use rand::Rng;
use tokio::sync::{mpsc, oneshot};
use tokio::time::{self, Instant, MissedTickBehavior};
use tracing::{debug, error, info, warn};

use crate::sync_session_bridge::{
    SyncSessionJob, SyncSessionResult, SyncSessionSendError, SyncSessionSender,
};
use crate::utils::choose_stream;

use super::config::SyncConfig;
use super::tracking::SyncState;
// Internal SyncProtocol for metrics (3 variants)
use super::tracking::SyncProtocol as TrackingSyncProtocol;
// Full SyncProtocol from primitives for protocol selection (7 variants, CIP §2.3)
// Uses shared state machine types for consistent behavior with simulation
use super::hash_comparison_protocol::{HashComparisonConfig, HashComparisonProtocol};
use super::level_sync::{LevelWiseConfig, LevelWiseProtocol};
use calimero_node_primitives::sync::{
    build_handshake_from_raw, estimate_entity_count, estimate_max_depth, select_protocol,
    SyncHandshake, SyncProtocol, SyncProtocolExecutor,
};

/// Network synchronization manager.
///
/// Orchestrates sync protocols: full resync, delta sync, state sync.
pub struct SyncManager {
    pub(crate) sync_config: SyncConfig,

    pub(super) node_client: NodeClient,
    pub(super) context_client: ContextClient,
    pub(crate) network_client: NetworkClient,
    pub(super) node_state: crate::NodeState,

    pub(super) ctx_sync_rx: Option<mpsc::Receiver<(Option<ContextId>, Option<PeerId>)>>,
    pub(super) ns_sync_rx: Option<mpsc::Receiver<[u8; 32]>>,
    pub(super) ns_join_rx: Option<
        mpsc::Receiver<(
            NamespaceJoinParams,
            oneshot::Sender<eyre::Result<JoinBundle>>,
        )>,
    >,
    pub(super) open_subgroup_join_rx: Option<
        mpsc::Receiver<(
            OpenSubgroupJoinParams,
            oneshot::Sender<eyre::Result<Vec<u8>>>,
        )>,
    >,

    /// Dispatch handle for the dedicated `SyncSessionActor` (#2316).
    /// Set via [`SyncManager::set_session_handles`] after the actor is
    /// started; `None` on freshly-cloned instances (which never run
    /// the `start` loop) and on the original until wiring completes.
    pub(super) session_tx: Option<SyncSessionSender>,
    /// Channel the `SyncSessionActor` writes initiator results into so
    /// `start` can update per-context tracking state. Consumed once by
    /// `start`; `None` on clones.
    pub(super) session_result_rx: Option<mpsc::UnboundedReceiver<SyncSessionResult>>,

    /// Sync-protocol metrics collector. Installed by `run.rs::start` via
    /// [`SyncManager::set_metrics`] after the [`crate::sync::PrometheusSyncMetrics`]
    /// instance is registered against the global registry. `None` means
    /// recording sites use [`crate::sync::no_op_metrics`] as a silent
    /// fallback — never a panic and never a runtime cost beyond a vtable
    /// no-op.
    ///
    /// `dyn SyncMetricsCollector` does not implement `Debug`, so we
    /// hand-write a `Debug` impl on `SyncManager` (below) that prints
    /// only the presence/absence of this field — the inner vtable is
    /// opaque anyway.
    pub(crate) metrics: Option<Arc<dyn super::metrics::SyncMetricsCollector>>,
}

impl std::fmt::Debug for SyncManager {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SyncManager")
            .field("sync_config", &self.sync_config)
            .field("metrics_installed", &self.metrics.is_some())
            .finish_non_exhaustive()
    }
}

impl Clone for SyncManager {
    fn clone(&self) -> Self {
        Self {
            sync_config: self.sync_config,
            node_client: self.node_client.clone(),
            context_client: self.context_client.clone(),
            network_client: self.network_client.clone(),
            node_state: self.node_state.clone(),
            ctx_sync_rx: None,
            ns_sync_rx: None,
            ns_join_rx: None,
            open_subgroup_join_rx: None,
            // Cloned `SyncManager`s never drive the `start` loop, so
            // they don't need a session-dispatch handle or a results
            // receiver. The bridge holds its own clone of the
            // SyncManager for issuing sessions.
            session_tx: None,
            session_result_rx: None,
            // Clones share the same metrics handle — Arc keeps the
            // recording surface unified across the original (which runs
            // `start`) and every responder/initiator clone.
            metrics: self.metrics.clone(),
        }
    }
}

/// True if `context_id` had a dispatch attempt recorded in `map` less
/// than `interval` ago.
///
/// Used in [`SyncManager::start`] to stop the every-tick re-attempt
/// storm after the `SyncSessionActor` mailbox returns `Full` (#2319): a
/// dropped dispatch records `Instant::now()` here, and we skip the
/// context until `interval` has elapsed. Distinct from `SyncState`'s
/// `last_sync` — touching that on a dropped dispatch is what #2317
/// forbids, since it would leave the context "in progress" forever with
/// no result to clear it.
pub(crate) fn dispatch_recently_attempted(
    map: &HashMap<ContextId, time::Instant>,
    context_id: &ContextId,
    interval: time::Duration,
) -> bool {
    map.get(context_id)
        .is_some_and(|attempted| attempted.elapsed() < interval)
}

/// True if an initiator dispatched for `context_id` looks wedged: its
/// dispatch was recorded in `dispatched_at` more than `grace` ago and
/// the context's [`SyncState`] still shows it "in progress"
/// (`last_sync == None`), i.e. no `SyncSessionResult` ever cleared it.
///
/// The #2319 watchdog in [`SyncManager::start`] uses this to detect a
/// `SyncSessionActor` whose single arbiter thread is stuck in a
/// synchronous merkle/CRDT-merge loop the per-session
/// `tokio::time::timeout` can't preempt — when it returns true the
/// manager synthesises an `on_failure` so the context is eligible for a
/// fresh dispatch rather than logging "Sync already in progress" forever.
pub(crate) fn session_dispatch_wedged(
    dispatched_at: &HashMap<ContextId, time::Instant>,
    state: &HashMap<ContextId, SyncState>,
    context_id: &ContextId,
    grace: time::Duration,
) -> bool {
    dispatched_at
        .get(context_id)
        .is_some_and(|dispatched| dispatched.elapsed() >= grace)
        && state
            .get(context_id)
            .is_some_and(|s| s.last_sync().is_none())
}

impl SyncManager {
    pub(crate) fn new(
        sync_config: SyncConfig,
        node_client: NodeClient,
        context_client: ContextClient,
        network_client: NetworkClient,
        node_state: crate::NodeState,
        ctx_sync_rx: mpsc::Receiver<(Option<ContextId>, Option<PeerId>)>,
        ns_sync_rx: mpsc::Receiver<[u8; 32]>,
        ns_join_rx: mpsc::Receiver<(
            NamespaceJoinParams,
            oneshot::Sender<eyre::Result<JoinBundle>>,
        )>,
        open_subgroup_join_rx: mpsc::Receiver<(
            OpenSubgroupJoinParams,
            oneshot::Sender<eyre::Result<Vec<u8>>>,
        )>,
    ) -> Self {
        Self {
            sync_config,
            node_client,
            context_client,
            network_client,
            node_state,
            ctx_sync_rx: Some(ctx_sync_rx),
            ns_sync_rx: Some(ns_sync_rx),
            ns_join_rx: Some(ns_join_rx),
            open_subgroup_join_rx: Some(open_subgroup_join_rx),
            session_tx: None,
            session_result_rx: None,
            metrics: None,
        }
    }

    /// Wire the `SyncSessionActor` handles onto the original
    /// `SyncManager` instance after the actor is started in `run.rs`.
    /// Must be called before [`SyncManager::start`]. No-op on cloned
    /// instances (those never run the `start` loop).
    pub(crate) fn set_session_handles(
        &mut self,
        session_tx: SyncSessionSender,
        session_result_rx: mpsc::UnboundedReceiver<SyncSessionResult>,
    ) {
        self.session_tx = Some(session_tx);
        self.session_result_rx = Some(session_result_rx);
    }

    /// Install the sync-protocol metrics collector. Must be called before
    /// any clones are taken; recording sites resolve `self.metrics` via
    /// [`SyncManager::metrics`] (which falls back to a no-op collector if
    /// this hasn't been called).
    pub(crate) fn set_metrics(&mut self, metrics: Arc<dyn super::metrics::SyncMetricsCollector>) {
        self.metrics = Some(metrics);
    }

    /// Resolve the metrics collector. Returns a static no-op handle when
    /// no collector was installed so call sites never have to branch on
    /// `Option` — `self.metrics().record_*()` is always valid.
    pub(crate) fn metrics(&self) -> &dyn super::metrics::SyncMetricsCollector {
        // The no-op fallback lives in a static OnceLock so it isn't
        // allocated per call. `NoOpMetrics` is a unit struct with
        // `Default`, so the init closure is `default()`.
        static NOOP: std::sync::OnceLock<super::metrics::NoOpMetrics> = std::sync::OnceLock::new();
        match self.metrics.as_deref() {
            Some(m) => m,
            None => NOOP.get_or_init(super::metrics::NoOpMetrics::default),
        }
    }

    /// Build `SyncHandshake` from local context state for protocol negotiation.
    ///
    /// Queries the real entity count and tree depth from the Merkle tree Index
    /// via the storage bridge. Falls back to estimation from DAG heads if the
    /// Index is not accessible (e.g., after snapshot sync with format mismatch).
    ///
    /// # Arguments
    ///
    /// * `context` - The context to build a handshake for.
    ///
    /// # Returns
    ///
    /// A `SyncHandshake` containing the context's current state summary.
    fn build_local_handshake(
        &self,
        context: &calimero_primitives::context::Context,
    ) -> SyncHandshake {
        let root_hash = *context.root_hash;
        let dag_heads = context.dag_heads.clone();

        // Try to get real entity count and depth from the Merkle tree Index.
        // This gives accurate protocol selection instead of guessing from dag_heads.
        let (entity_count, max_depth) = self.query_tree_stats(&context.id).unwrap_or_else(|| {
            // Fallback: estimate from dag_heads if Index is unavailable
            let count = estimate_entity_count(root_hash, dag_heads.len());
            let depth = estimate_max_depth(count);
            (count, depth)
        });

        build_handshake_from_raw(root_hash, entity_count, max_depth, dag_heads)
    }

    /// Query real entity count and tree depth from the Merkle tree Index.
    ///
    /// Returns `Some((entity_count, max_depth))` on success, `None` if the
    /// Index is unavailable (e.g., fresh node or deserialization mismatch).
    fn query_tree_stats(&self, context_id: &ContextId) -> Option<(u64, u32)> {
        use calimero_node_primitives::sync::create_runtime_env;
        use calimero_storage::address::Id;
        use calimero_storage::env::with_runtime_env;
        use calimero_storage::index::Index;
        use calimero_storage::store::MainStorage;

        let store = self.context_client.datastore_handle().into_inner();
        // SAFETY: identity is unused for read-only Index queries via RuntimeEnv
        let identity = calimero_primitives::identity::PublicKey::from([0u8; 32]);
        let env = create_runtime_env(&store, *context_id, identity);

        let root_id = Id::new(*context_id.as_ref());

        with_runtime_env(env, || {
            // Check if root Index exists
            let root_index = Index::<MainStorage>::get_index(root_id).ok().flatten()?;

            // Count children (leaf entities) under root.
            // Minimum 1 when root exists (consistent with fallback estimation).
            let children = root_index.children().unwrap_or_default();
            let entity_count = (children.len() as u64).max(1);

            // Depth: 1 when root has data (consistent with fallback).
            // For deeper trees, we'd need recursive traversal — tracked in #2054.
            let max_depth = 1;

            Some((entity_count, max_depth))
        })
    }

    /// Build `SyncHandshake` from peer state for protocol negotiation.
    ///
    /// Uses shared estimation functions from `calimero_node_primitives::sync::state_machine`
    /// to ensure consistent behavior between production (`SyncManager`) and simulation (`SimNode`).
    fn build_remote_handshake(
        peer_root_hash: calimero_primitives::hash::Hash,
        peer_dag_heads: &[[u8; DIGEST_SIZE]],
    ) -> SyncHandshake {
        let root_hash = *peer_root_hash;

        // Use shared estimation functions for consistency with simulation
        let entity_count = estimate_entity_count(root_hash, peer_dag_heads.len());
        let max_depth = estimate_max_depth(entity_count);

        build_handshake_from_raw(root_hash, entity_count, max_depth, peer_dag_heads.to_vec())
    }

    pub async fn start(mut self) {
        let mut next_sync = time::interval(self.sync_config.frequency);

        next_sync.set_missed_tick_behavior(MissedTickBehavior::Delay);

        let mut state = HashMap::<_, SyncState>::new();

        // #2319: per-context "last dispatch attempt" instants. When a
        // `try_send` to the `SyncSessionActor` returns `Full`/`Closed`
        // we record the context here and skip re-dispatching it until
        // `sync_config.interval` has elapsed — otherwise every interval
        // tick (and every heartbeat-driven re-trigger) re-attempts and
        // re-drops, which is the wedge in #2319. A real
        // `SyncSessionResult` clears the entry. Distinct from
        // `SyncState.last_sync`, which #2317 forbids touching on a
        // dropped dispatch.
        let mut last_dispatch_attempt = HashMap::<ContextId, time::Instant>::new();

        // #2319: watchdog. Once an initiator is dispatched (Phase 3
        // below clears the context's `last_sync` to `None`), a
        // `SyncSessionResult` is supposed to arrive and call
        // `on_success`/`on_failure` to clear it. If the `SyncSessionActor`
        // wedges — its single arbiter thread stuck in a synchronous
        // merkle/CRDT-merge loop that the per-session `tokio::time::timeout`
        // can't preempt, or its mailbox saturated by such sessions — that
        // result never comes and the context stays "in progress" forever
        // ("Sync already in progress" on every tick, never converging
        // again). Record when we dispatched; if no result has cleared
        // the entry within `SESSION_WEDGE_GRACE` we synthesise an
        // `on_failure` so the context becomes eligible again and the
        // periodic loop retries with a fresh dispatch. The entry is
        // cleared by the real result on `session_result_rx`.
        const SESSION_WEDGE_GRACE_MULTIPLIER: u32 = 2;
        let session_wedge_grace =
            self.sync_config.session_deadline * SESSION_WEDGE_GRACE_MULTIPLIER;
        let mut initiator_dispatched_at = HashMap::<ContextId, time::Instant>::new();

        // #2319: rate-limit the "mailbox full" warn to ≤1 per context
        // per `MAILBOX_FULL_SUMMARY_WINDOW`; the rest roll up into one
        // info line per window so the wedge is still visible without
        // drowning the log.
        const MAILBOX_FULL_SUMMARY_WINDOW: time::Duration = time::Duration::from_secs(60);
        let mut last_full_warn = HashMap::<ContextId, time::Instant>::new();
        let mut full_drops_in_window: u64 = 0;
        let mut full_window_started = time::Instant::now();

        let mut requested_ctx = None;
        let mut requested_peer = None;

        let Some(mut ctx_sync_rx) = self.ctx_sync_rx.take() else {
            error!("SyncManager can only be run once");
            return;
        };
        let mut ns_sync_rx = self.ns_sync_rx.take().unwrap_or_else(|| {
            let (_tx, rx) = mpsc::channel(1);
            rx
        });
        let mut ns_join_rx = self.ns_join_rx.take().unwrap_or_else(|| {
            let (_tx, rx) = mpsc::channel(1);
            rx
        });
        let mut open_subgroup_join_rx = self.open_subgroup_join_rx.take().unwrap_or_else(|| {
            let (_tx, rx) = mpsc::channel(1);
            rx
        });
        let Some(session_tx) = self.session_tx.clone() else {
            error!("SyncManager started without a SyncSessionActor handle (#2316)");
            return;
        };
        let Some(mut session_result_rx) = self.session_result_rx.take() else {
            error!("SyncManager started without a SyncSessionActor result channel (#2316)");
            return;
        };

        // Apply a session result to per-context tracking state. Mirrors
        // the body of the legacy `advance` helper, which read from a
        // local `FuturesUnordered<futs>` before #2316 moved session
        // execution onto a dedicated arbiter.
        fn apply_session_result(
            state: &mut HashMap<ContextId, SyncState>,
            result: SyncSessionResult,
        ) {
            let SyncSessionResult {
                context_id,
                peer_id,
                took,
                result,
            } = result;

            let _ignored = state.entry(context_id).and_modify(|state| match result {
                Ok(Ok(ref protocol)) => {
                    state.on_success(peer_id, TrackingSyncProtocol::from(protocol));
                    info!(
                        %context_id,
                        ?took,
                        ?protocol,
                        success_count = state.success_count,
                        "Sync finished successfully"
                    );
                }
                Ok(Err(ref err)) => {
                    state.on_failure(err.to_string());
                    warn!(
                        %context_id,
                        ?took,
                        error = %err,
                        failure_count = state.failure_count(),
                        backoff_secs = state.backoff_delay().as_secs(),
                        "Sync failed, applying exponential backoff"
                    );
                }
                Err(ref timeout_err) => {
                    state.on_failure(timeout_err.to_string());
                    warn!(
                        %context_id,
                        ?took,
                        failure_count = state.failure_count(),
                        backoff_secs = state.backoff_delay().as_secs(),
                        "Sync timed out, applying exponential backoff"
                    );
                }
            });
        }

        loop {
            tokio::select! {
                _ = next_sync.tick() => {
                    debug!("Performing interval sync");
                    // #2319: roll up rate-limited mailbox-full drops.
                    if full_window_started.elapsed() >= MAILBOX_FULL_SUMMARY_WINDOW {
                        if full_drops_in_window > 0 {
                            info!(
                                full_drops_in_window,
                                contexts_affected = last_full_warn.len(),
                                "SyncSession mailbox-full drop rollup (last {:?}) (#2319)",
                                MAILBOX_FULL_SUMMARY_WINDOW
                            );
                        }
                        full_drops_in_window = 0;
                        full_window_started = time::Instant::now();
                        last_full_warn.retain(|_, t| t.elapsed() < MAILBOX_FULL_SUMMARY_WINDOW);
                    }
                    // #2319 watchdog: any context whose initiator was
                    // dispatched longer than `session_wedge_grace` ago
                    // and is still flagged "in progress" — its session
                    // (or the whole `SyncSessionActor`) is wedged and no
                    // `SyncSessionResult` is coming. Synthesise a
                    // failure so the context is eligible again; the
                    // dispatch loop below will retry it this tick.
                    let wedged: Vec<ContextId> = initiator_dispatched_at
                        .keys()
                        .copied()
                        .filter(|context_id| {
                            session_dispatch_wedged(
                                &initiator_dispatched_at,
                                &state,
                                context_id,
                                session_wedge_grace,
                            )
                        })
                        .collect();
                    // Drop the dispatch record either way once past grace
                    // (a still-in-progress entry that's been there >grace
                    // is what we just failed; a no-longer-in-progress one
                    // already got a result that removed it, but be tidy).
                    initiator_dispatched_at
                        .retain(|_, dispatched_at| dispatched_at.elapsed() < session_wedge_grace);
                    for context_id in wedged {
                        warn!(
                            %context_id,
                            grace = ?session_wedge_grace,
                            "SyncSession initiator produced no result within watchdog grace — assuming a wedged session/actor; failing it so periodic-sync retries (#2319)"
                        );
                        if let Some(s) = state.get_mut(&context_id) {
                            s.on_failure(
                                "sync session wedged — no SyncSessionResult within watchdog grace (#2319)"
                                    .to_owned(),
                            );
                        }
                    }
                }
                Some(result) = session_result_rx.recv() => {
                    // #2319: a real result means this context is no
                    // longer wedged behind a full mailbox — drop the
                    // dispatch-attempt backoff so it isn't throttled —
                    // and the watchdog timer for it is satisfied.
                    let _removed = last_dispatch_attempt.remove(&result.context_id);
                    let _removed = initiator_dispatched_at.remove(&result.context_id);
                    apply_session_result(&mut state, result);
                    continue;
                }
                Some(namespace_id) = ns_sync_rx.recv() => {
                    info!(
                        namespace_id = %hex::encode(namespace_id),
                        "Performing namespace governance sync"
                    );
                    self.sync_namespace_from_peer(namespace_id).await;
                    continue;
                }
                Some((params, reply_tx)) = ns_join_rx.recv() => {
                    info!(
                        namespace_id = %hex::encode(params.namespace_id),
                        "Processing namespace join request (initiator side)"
                    );
                    let result = self.initiate_namespace_join(params).await;
                    let _ignored = reply_tx.send(result);
                    continue;
                }
                Some((params, reply_tx)) = open_subgroup_join_rx.recv() => {
                    info!(
                        namespace_id = %hex::encode(params.namespace_id),
                        subgroup_id = %hex::encode(params.subgroup_id),
                        "Processing open-subgroup join request (initiator side)"
                    );
                    let result = self.initiate_open_subgroup_join(params).await;
                    let _ignored = reply_tx.send(result);
                    continue;
                }
                Some((ctx, peer)) = ctx_sync_rx.recv() => {
                    info!(?ctx, ?peer, "Received sync request");

                    requested_ctx = ctx;
                    requested_peer = peer;

                    // CRITICAL FIX: Drain all other pending sync requests in the queue.
                    // When multiple contexts join rapidly (common in E2E tests), they all
                    // call sync() which queues requests in ctx_sync_rx. The old code only
                    // processed ONE request per loop iteration, leaving contexts 2-N queued
                    // indefinitely. This caused those contexts to never sync and remain
                    // with dag_heads=[] and Uninitialized errors.
                    //
                    // Solution: Use try_recv() to drain all buffered requests immediately,
                    // then trigger a full sync that will process all contexts.
                    let mut drained_count = 0;
                    while ctx_sync_rx.try_recv().is_ok() {
                        drained_count += 1;
                    }

                    if drained_count > 0 {
                        info!(drained_count, "Drained additional sync requests from queue, will sync all contexts");
                        // Clear requested_ctx to force syncing ALL contexts
                        // This ensures newly-joined contexts get synced even if they weren't first in queue
                        requested_ctx = None;
                        requested_peer = None;
                    }
                }
            }

            let requested_ctx = requested_ctx.take();
            let requested_peer = requested_peer.take();

            let contexts = requested_ctx
                .is_none()
                .then(|| self.context_client.get_context_ids(None));

            let contexts = stream::iter(requested_ctx)
                .map(Ok)
                .chain(stream::iter(contexts).flatten());

            let mut contexts = pin!(contexts);

            while let Some(context_id) = contexts.next().await {
                let context_id = match context_id {
                    Ok(context_id) => context_id,
                    Err(err) => {
                        error!(%err, "Failed reading context id to sync");
                        continue;
                    }
                };

                // #2319: respect the dispatch-attempt backoff. After a
                // `Full` mailbox we wait one `interval` before retrying
                // this context rather than re-attempting (and re-dropping)
                // on every tick. Explicit requests bypass it, same as the
                // recency override below.
                if requested_ctx.is_none()
                    && dispatch_recently_attempted(
                        &last_dispatch_attempt,
                        &context_id,
                        self.sync_config.interval,
                    )
                {
                    debug!(%context_id, "Skipping sync — dispatch recently attempted, mailbox was full (#2319)");
                    continue;
                }

                // Phase 1: read-only eligibility check. We must not
                // mutate `state` here because a failed `try_send`
                // below would leave `last_sync = None` with no future
                // result to clear it — permanently stalling the
                // context (Cursor bugbot #2317).
                let is_first_sync = match state.get(&context_id) {
                    Some(existing) => {
                        let Some(last_sync) = existing.last_sync() else {
                            debug!(%context_id, "Sync already in progress");
                            continue;
                        };

                        let minimum = self.sync_config.interval;
                        let time_since = last_sync.elapsed();

                        if time_since < minimum {
                            if requested_ctx.is_none() {
                                debug!(%context_id, ?time_since, ?minimum, "Skipping sync, last one was too recent");
                                continue;
                            }

                            debug!(%context_id, ?time_since, ?minimum, "Force syncing despite recency, due to explicit request");
                        }
                        false
                    }
                    None => true,
                };

                info!(%context_id, "Scheduled sync");

                // Phase 2: dispatch BEFORE mutating state — so a
                // `Full`/`Closed` outcome leaves the per-context
                // tracking state untouched and the next interval
                // tick (or heartbeat trigger) just retries.
                let dispatched = match session_tx.try_send(SyncSessionJob::Initiator {
                    context_id,
                    peer_id: requested_peer,
                }) {
                    Ok(()) => true,
                    Err(SyncSessionSendError::Full) => {
                        full_drops_in_window += 1;
                        let warn_now = last_full_warn
                            .get(&context_id)
                            .is_none_or(|t| t.elapsed() >= MAILBOX_FULL_SUMMARY_WINDOW);
                        if warn_now {
                            warn!(
                                %context_id,
                                "SyncSession actor mailbox full — skipping initiator dispatch; backing off this context for {:?} (#2316/#2319)",
                                self.sync_config.interval
                            );
                            let _prev = last_full_warn.insert(context_id, time::Instant::now());
                        } else {
                            debug!(%context_id, "SyncSession actor mailbox full — skipping (rate-limited; see periodic rollup) (#2319)");
                        }
                        false
                    }
                    Err(SyncSessionSendError::Closed) => {
                        warn!(
                            %context_id,
                            "SyncSession actor closed — skipping initiator dispatch"
                        );
                        false
                    }
                };

                if !dispatched {
                    // #2319: record the failed attempt so the next
                    // interval tick backs off instead of re-dropping.
                    let _prev = last_dispatch_attempt.insert(context_id, time::Instant::now());
                    continue;
                }

                // Phase 3: dispatch succeeded — mark the context as
                // in-flight. A `SyncSessionResult` will arrive on
                // `session_result_rx` and call `on_success` /
                // `on_failure` to clear the flag — or, if it never does,
                // the #2319 watchdog above fails it after the grace.
                let _prev = initiator_dispatched_at.insert(context_id, time::Instant::now());
                if is_first_sync {
                    info!(%context_id, "Syncing for the first time");
                    let mut new_state = SyncState::new();
                    new_state.start();
                    let _ignored = state.insert(context_id, new_state);
                } else if let Some(existing) = state.get_mut(&context_id) {
                    let _ignored = existing.take_last_sync();
                }
            }
        }
    }

    pub(crate) async fn perform_interval_sync(
        &self,
        context_id: ContextId,
        peer_id: Option<PeerId>,
    ) -> eyre::Result<(PeerId, SyncProtocol)> {
        if let Some(peer_id) = peer_id {
            return self.initiate_sync(context_id, peer_id).await;
        }

        // Check if we're uninitialized before peer discovery so we can use
        // a longer mesh wait window for bootstrap scenarios.
        let context = self
            .context_client
            .get_context(&context_id)?
            .ok_or_else(|| eyre::eyre!("Context not found: {}", context_id))?;

        let is_uninitialized = *context.root_hash == [0; 32];

        // Retry peer discovery if mesh is still forming.
        // Uninitialized nodes need a longer wait window (10s vs 1.5s) to avoid
        // getting stuck before first snapshot sync. Gossipsub mesh takes 5-10
        // heartbeats (~5-10s) to add a new subscriber after topic subscription.
        let (max_retries, retry_delay_ms) = if is_uninitialized {
            (
                super::config::DEFAULT_MESH_RETRIES_UNINITIALIZED,
                super::config::DEFAULT_MESH_RETRY_DELAY_MS_UNINITIALIZED,
            )
        } else {
            (
                super::config::DEFAULT_MESH_RETRIES_INITIALIZED,
                super::config::DEFAULT_MESH_RETRY_DELAY_MS_INITIALIZED,
            )
        };

        let mesh_discovery_start = Instant::now();
        let mut peers = Vec::new();
        let mut final_attempt = 0u32;
        for attempt in 1..=max_retries {
            final_attempt = attempt;
            peers = self
                .network_client
                .mesh_peers(TopicHash::from_raw(context_id))
                .await;

            if !peers.is_empty() {
                break;
            }

            if attempt < max_retries {
                debug!(
                    %context_id,
                    attempt,
                    is_uninitialized,
                    max_retries,
                    "No peers found yet, mesh may still be forming, retrying..."
                );
                time::sleep(std::time::Duration::from_millis(retry_delay_ms)).await;
            }
        }
        let mesh_elapsed = mesh_discovery_start.elapsed();

        if peers.is_empty() {
            // Gossipsub mesh for the context topic hasn't formed yet (takes 5-10
            // heartbeats / ~5-10s after subscription). Try namespace mesh peers as
            // a fallback: the namespace topic's mesh is established during join
            // with a 2-second grace period, so namespace peers are available even
            // when the context-specific gossipsub mesh is still being built.
            // Direct-stream context sync works over any connected P2P peer.
            //
            // `get_context_group_id` returns the context's IMMEDIATE owning group,
            // which for a context owned by a subgroup is the subgroup id, not the
            // namespace root. Only namespace roots ever have `ns/<id>` topics
            // subscribed (see `NodeClient::subscribe_namespace`), so we must walk
            // up the parent chain to find the root before computing the fallback
            // topic. Without this walk, contexts owned by subgroups always get 0
            // peers from the fallback and sync fails during the 5-10s cold-start
            // window. `resolve_namespace` on a root group is a no-op (returns the
            // same id), so behaviour for namespace-root-owned contexts is
            // unchanged.
            if let Ok(Some(group_id)) = self.context_client.get_context_group_id(&context_id) {
                let store = self.context_client.datastore_handle().into_inner();
                let ns_id_bytes = calimero_context::group_store::resolve_namespace(
                    &store,
                    &calimero_context_config::types::ContextGroupId::from(group_id),
                )
                .map(|id| id.to_bytes())
                .unwrap_or_else(|err| {
                    // Errors here are rare and always indicate something worth
                    // investigating: store I/O failure or a circular parent chain
                    // exceeding MAX_NAMESPACE_DEPTH. Surface them before falling
                    // back so this debugging-focused code path doesn't hide real
                    // data-integrity bugs. Falling back to the immediate owning
                    // group preserves pre-fix behaviour rather than aborting the
                    // whole sync attempt.
                    warn!(
                        %context_id,
                        %err,
                        "failed to resolve namespace root for fallback topic; \
                         using immediate group id as best-effort"
                    );
                    group_id
                });

                let ns_topic = TopicHash::from_raw(format!("ns/{}", hex::encode(ns_id_bytes)));
                let ns_peers = self.network_client.mesh_peers(ns_topic).await;
                if !ns_peers.is_empty() {
                    info!(
                        %context_id,
                        peer_count = ns_peers.len(),
                        is_uninitialized,
                        "context gossipsub mesh not ready; falling back to namespace mesh peers"
                    );
                    peers = ns_peers;
                }
            }
        }

        if peers.is_empty() {
            warn!(
                %context_id,
                is_uninitialized,
                attempts = max_retries,
                ?mesh_elapsed,
                "Mesh peer discovery exhausted all retries"
            );
            bail!("No peers to sync with for context {}", context_id);
        }

        info!(
            %context_id,
            peer_count = peers.len(),
            attempts = final_attempt,
            ?mesh_elapsed,
            is_uninitialized,
            peers = ?peers,
            "Mesh peer discovery succeeded"
        );

        if is_uninitialized {
            // When uninitialized, we need to bootstrap from a peer that HAS data
            // Trying random peers can result in querying other uninitialized nodes
            info!(
                %context_id,
                peer_count = peers.len(),
                "Node is uninitialized, selecting peer with state for bootstrapping"
            );

            // Try to find a peer with actual state
            match self.find_peer_with_state(context_id, &peers).await {
                Ok(peer_id) => {
                    info!(%context_id, %peer_id, "Found peer with state, syncing from them");
                    return self.initiate_sync(context_id, peer_id).await;
                }
                Err(e) => {
                    warn!(%context_id, error = %e, "Failed to find peer with state, falling back to random selection");
                    // Fall through to random selection
                }
            }
        }

        // Normal sync: try peers serially. Parallelising `initiate_sync` for
        // the same context is unsafe — the sync protocol mutates per-context
        // state (sync-in-progress marker at snapshot.rs:581, sync sessions at
        // state.rs:235, snapshot-page cleanup in `request_and_apply_snapshot_pages`
        // which documents "assumes no concurrent writes") and futures cancelled
        // mid-flight can leak a sync session into the DashMap, causing
        // `should_buffer_delta` to return true permanently. Tail-latency
        // benefit is still obtained from the parallel probe above, which
        // narrows this loop to "try a known-good peer first".
        //
        // Peer order: random shuffle, then stable-partition so peers we
        // have observed signing applied messages with an
        // Owner/Admin/ReadOnlyTee identity come first. Anchors are the
        // peers whose canonical view is authoritative — targeting them
        // first reduces the chance of pulling from a peer that's
        // behind or divergent. Plain members still get tried if all
        // anchors fail. Empty cache or context with no observed anchor
        // peers degrades to plain random selection.
        let mut shuffled: Vec<libp2p::PeerId> = peers
            .choose_multiple(&mut rand::thread_rng(), peers.len())
            .copied()
            .collect();
        let anchor_count = partition_peers_anchor_first(
            &mut shuffled,
            &self.node_state.peer_identities,
            &self.anchor_identities_for_context(&context_id),
        );
        if anchor_count > 0 {
            debug!(
                %context_id,
                anchor_peer_count = anchor_count,
                non_anchor_peer_count = shuffled.len() - anchor_count,
                "Preferring anchor peers for sync"
            );
        } else {
            debug!(
                %context_id,
                peer_count = shuffled.len(),
                "No anchor peers connected — falling back to random selection"
            );
        }
        for peer_id in &shuffled {
            if let Ok(result) = self.initiate_sync(context_id, *peer_id).await {
                return Ok(result);
            }
        }

        bail!("Failed to sync with any peer for context {}", context_id)
    }

    /// Look up the trusted-anchor identity set for the group that owns
    /// `context_id` (Owner, Admins, ReadOnlyTee members). Returns an
    /// empty set on any failure — context not registered to a group,
    /// store read error, or no meta written yet. Callers fall back to
    /// plain random peer selection on an empty set.
    fn anchor_identities_for_context(
        &self,
        context_id: &ContextId,
    ) -> std::collections::BTreeSet<calimero_primitives::identity::PublicKey> {
        let store = self.context_client.datastore_handle().into_inner();
        let Ok(Some(group_id)) =
            calimero_context::group_store::get_group_for_context(&store, context_id)
        else {
            return std::collections::BTreeSet::new();
        };
        self.anchor_identities_for_group(&group_id)
    }

    /// Look up the trusted-anchor identity set for a group directly.
    /// Preferred over [`Self::anchor_identities_for_context`] when the
    /// caller already knows `group_id` — late-joiner nodes can have a
    /// missing context→group mapping, which makes the context-keyed
    /// lookup return an empty set even though the group's anchors are
    /// well-defined on the local node.
    fn anchor_identities_for_group(
        &self,
        group_id: &calimero_context_config::types::ContextGroupId,
    ) -> std::collections::BTreeSet<calimero_primitives::identity::PublicKey> {
        let store = self.context_client.datastore_handle().into_inner();
        calimero_context::group_store::trusted_anchors_for_group(&store, group_id)
            .unwrap_or_default()
    }

    /// Find a peer that has state (non-zero root_hash and non-empty DAG heads)
    ///
    /// This is critical for bootstrapping newly joined nodes. Without this,
    /// uninitialized nodes may query other uninitialized nodes, resulting in
    /// all nodes remaining uninitialized.
    ///
    /// Peers are probed concurrently so a single slow/unreachable peer no
    /// longer stalls the entire discovery. The first peer to report state
    /// wins and remaining probes are cancelled when this function returns.
    async fn find_peer_with_state(
        &self,
        context_id: ContextId,
        peers: &[PeerId],
    ) -> eyre::Result<PeerId> {
        use calimero_node_primitives::sync::{InitPayload, MessagePayload, StreamMessage};

        // Get our identity for handshake
        let identities = self
            .context_client
            .get_context_members(&context_id, Some(true));

        let Some((our_identity, _)) = choose_stream(identities, &mut rand::thread_rng())
            .await
            .transpose()?
        else {
            bail!("no owned identities found for context: {}", context_id);
        };

        let timeout_budget = self.sync_config.timeout / 6;
        let concurrency = self
            .sync_config
            .peer_state_probe_concurrency
            .min(peers.len())
            .max(1);

        debug!(
            %context_id,
            peer_count = peers.len(),
            concurrency,
            "Probing peers for state in parallel"
        );

        // Each probe opens a P2P stream, sends one `DagHeadsRequest`, and
        // reads the response. When we find a peer with state and return, the
        // remaining in-flight probes are dropped without sending a close
        // frame; libp2p's idle-timeout handles the cleanup, and the peer may
        // log a write-error if it was mid-response. This is an accepted
        // trade-off — the probe is read-only on the local node, so there is
        // no partial state to unwind, and adding an explicit graceful-close
        // path would require async work in `Drop`, which Rust does not
        // support cleanly.
        let mut probes = stream::iter(peers.iter().copied())
            .map(|peer_id| async move {
                let outcome = async {
                    let mut stream = self.network_client.open_stream(peer_id).await?;

                    let request_msg = StreamMessage::Init {
                        context_id,
                        party_id: our_identity,
                        payload: InitPayload::DagHeadsRequest { context_id },
                        next_nonce: rand::thread_rng().gen(),
                    };

                    self.send(&mut stream, &request_msg, None).await?;

                    let Some(response) =
                        super::stream::recv(&mut stream, None, timeout_budget).await?
                    else {
                        return Ok::<_, eyre::Error>(None);
                    };

                    if let StreamMessage::Message {
                        payload:
                            MessagePayload::DagHeadsResponse {
                                dag_heads,
                                root_hash,
                            },
                        ..
                    } = response
                    {
                        // Peer has state if root_hash is not zeros (dag_heads may
                        // be empty for migrated/legacy contexts).
                        let has_state = *root_hash != [0; 32];
                        let heads_count = dag_heads.len();
                        debug!(
                            %context_id,
                            %peer_id,
                            heads_count,
                            %root_hash,
                            has_state,
                            "Received DAG heads from peer"
                        );
                        Ok(Some((has_state, heads_count, root_hash)))
                    } else {
                        Ok(None)
                    }
                }
                .await;

                (peer_id, outcome)
            })
            .buffer_unordered(concurrency);

        while let Some((peer_id, outcome)) = probes.next().await {
            match outcome {
                Ok(Some((true, heads_count, root_hash))) => {
                    info!(
                        %context_id,
                        %peer_id,
                        heads_count,
                        %root_hash,
                        "Found peer with state for bootstrapping"
                    );
                    return Ok(peer_id);
                }
                Ok(Some((false, _, _))) => {
                    debug!(%context_id, %peer_id, "peer reported no state");
                }
                Ok(None) => {
                    debug!(%context_id, %peer_id, "peer did not return DAG heads");
                }
                Err(e) => {
                    debug!(%context_id, %peer_id, error = %e, "peer probe failed");
                }
            }
        }

        bail!("No peers with state found for context {}", context_id)
    }

    async fn initiate_sync(
        &self,
        context_id: ContextId,
        peer_id: PeerId,
    ) -> eyre::Result<(PeerId, SyncProtocol)> {
        let start = Instant::now();

        info!(%context_id, %peer_id, "Attempting to sync with peer");

        // Metrics: every sync attempt goes through this chokepoint, so
        // `sync_start / sync_complete / sync_failure` here covers every
        // protocol path. We don't yet know the protocol on entry — pass
        // "unknown"; the success arm overwrites with the protocol the
        // negotiated path actually chose.
        self.metrics()
            .record_sync_start(&context_id.to_string(), "unknown", "interval");

        let protocol = match self.initiate_sync_inner(context_id, peer_id).await {
            Ok(protocol) => protocol,
            Err(err) => {
                warn!(
                    %context_id,
                    %peer_id,
                    error = %err,
                    "Sync attempt failed for peer"
                );
                self.metrics().record_sync_failure(
                    &context_id.to_string(),
                    "unknown",
                    err.to_string().as_str(),
                );
                return Err(err);
            }
        };

        let took = start.elapsed();

        info!(%context_id, %peer_id, ?took, ?protocol, "Sync with peer completed successfully");

        // Use the variant-only `SyncProtocolKind` for the protocol label
        // so it matches the fixed `KNOWN_PROTOCOLS` set in
        // `PrometheusSyncMetrics::sanitize_protocol`. Formatting the
        // data-carrying `SyncProtocol` with `{:?}` would yield strings
        // like `HashComparison { root_hash: [...], divergent_subtrees: [...] }`
        // which never match the sanitiser and would label every sync
        // `protocol="unknown"`, breaking the per-protocol slicing on
        // `sync_successes_total` and `sync_duration_seconds`.
        //
        // `entities_transferred` is not threaded back to the sync manager
        // today; pass 0. The collector still records the duration histogram
        // and a sync_successes increment, which are the two most useful
        // signals on a dashboard.
        self.metrics().record_sync_complete(
            &context_id.to_string(),
            &format!("{:?}", protocol.kind()),
            took,
            0,
        );

        Ok((peer_id, protocol))
    }

    /// Sends a message over the stream (delegates to stream module).
    pub(super) async fn send(
        &self,
        stream: &mut Stream,
        message: &StreamMessage<'_>,
        shared_key: Option<(SharedKey, Nonce)>,
    ) -> eyre::Result<()> {
        super::stream::send(stream, message, shared_key).await
    }

    /// Receives a message from the stream (delegates to stream module).
    pub(super) async fn recv(
        &self,
        stream: &mut Stream,
        shared_key: Option<(SharedKey, Nonce)>,
    ) -> eyre::Result<Option<StreamMessage<'static>>> {
        let budget = self.sync_config.timeout / 3;
        super::stream::recv(stream, shared_key, budget).await
    }

    /// Get blob ID and application config from application or context config
    async fn get_blob_info(
        &self,
        context_id: &ContextId,
        application: &Option<calimero_primitives::application::Application>,
    ) -> eyre::Result<(
        calimero_primitives::blobs::BlobId,
        Option<calimero_primitives::application::Application>,
    )> {
        if let Some(ref app) = application {
            Ok((app.blob.bytecode, None))
        } else {
            // Application not found - get blob_id from context config
            let app_config = self
                .context_client
                .get_context_application(context_id)
                .await?;
            Ok((app_config.blob.bytecode, Some(app_config)))
        }
    }

    /// Get application size from application, cached config, or context config
    async fn get_application_size(
        &self,
        context_id: &ContextId,
        application: &Option<calimero_primitives::application::Application>,
        app_config_opt: &Option<calimero_primitives::application::Application>,
    ) -> eyre::Result<u64> {
        if let Some(ref app) = application {
            Ok(app.size)
        } else if let Some(ref app_config) = app_config_opt {
            Ok(app_config.size)
        } else {
            let app_config = self
                .context_client
                .get_context_application(context_id)
                .await?;
            Ok(app_config.size)
        }
    }

    /// Get application source from cached config or context config
    async fn get_application_source(
        &self,
        context_id: &ContextId,
        app_config_opt: &Option<calimero_primitives::application::Application>,
    ) -> eyre::Result<calimero_primitives::application::ApplicationSource> {
        if let Some(ref app_config) = app_config_opt {
            Ok(app_config.source.clone())
        } else {
            let app_config = self
                .context_client
                .get_context_application(context_id)
                .await?;
            Ok(app_config.source.clone())
        }
    }

    /// Install bundle application after blob sharing completes.
    ///
    /// Returns `Some(installed_application)` if a bundle was installed,
    /// `None` otherwise. Updates `context.application_id` if the installed
    /// ApplicationId differs from the context's ApplicationId.
    async fn install_bundle_after_blob_sharing(
        &self,
        context_id: &ContextId,
        blob_id: &calimero_primitives::blobs::BlobId,
        app_config_opt: &Option<calimero_primitives::application::Application>,
        context: &mut calimero_primitives::context::Context,
        application: &mut Option<calimero_primitives::application::Application>,
    ) -> eyre::Result<()> {
        // Only proceed if blob is now available locally
        if !self.node_client.has_blob(blob_id)? {
            return Ok(());
        }

        // Check if blob is a bundle
        let Some(blob_bytes) = self.node_client.get_blob_bytes(blob_id, None).await? else {
            return Ok(());
        };

        // Wrap blocking I/O in spawn_blocking to avoid blocking async runtime
        let blob_bytes_clone = blob_bytes.clone();
        let is_bundle =
            tokio::task::spawn_blocking(move || NodeClient::is_bundle_blob(&blob_bytes_clone))
                .await?;

        // Get source from context config (use cached if available, otherwise fetch)
        let source = self
            .get_application_source(context_id, app_config_opt)
            .await?;

        let installed_app_id = if is_bundle {
            self.node_client
                .install_application_from_bundle_blob(blob_id, &source)
                .await
                .map_err(|e| {
                    eyre::eyre!(
                        "Failed to install bundle application from blob {}: {}",
                        blob_id,
                        e
                    )
                })?
        } else {
            // For non-bundle apps, write ApplicationMeta directly under the
            // known application_id rather than re-deriving it via
            // install_application (which hashes source+metadata and would
            // produce a different ID than the original installer used).
            let size = blob_bytes.len() as u64;
            let mut handle = self.context_client.datastore_handle();
            handle.put(
                &calimero_store::key::ApplicationMeta::new(context.application_id),
                &calimero_store::types::ApplicationMeta::new(
                    calimero_store::key::BlobMeta::new(*blob_id),
                    size,
                    source.to_string().into_boxed_str(),
                    Box::default(),
                    calimero_store::key::BlobMeta::new(calimero_primitives::blobs::BlobId::from(
                        [0u8; 32],
                    )),
                    "unknown".to_owned().into_boxed_str(),
                    "0.0.0".to_owned().into_boxed_str(),
                    String::new().into_boxed_str(),
                ),
            )?;
            context.application_id
        };

        // Verify installation succeeded by fetching the installed application
        let installed_application = self
            .node_client
            .get_application(&installed_app_id)
            .map_err(|e| {
                eyre::eyre!(
                    "Failed to verify bundle installation for application {}: {}",
                    installed_app_id,
                    e
                )
            })?;

        let Some(installed_application) = installed_application else {
            bail!(
                "Bundle installation reported success but application {} is not retrievable",
                installed_app_id
            );
        };

        // Check if the installed ApplicationId matches the context's ApplicationId
        if installed_app_id != context.application_id {
            warn!(
                installed_app_id = %installed_app_id,
                context_app_id = %context.application_id,
                "Installed application ID does not match context application ID, updating to installed ID"
            );
            // Update context with the installed application ID for consistency
            context.application_id = installed_app_id;

            // Persist the ApplicationId change to the database
            // This is critical: if we don't persist, the old ApplicationId will be
            // used on node restart, causing application lookup failures
            self.context_client
                .update_context_application_id(context_id, installed_app_id)
                .map_err(|e| {
                    eyre::eyre!(
                        "Failed to persist ApplicationId update for context {}: {}",
                        context_id,
                        e
                    )
                })?;

            debug!(
                %context_id,
                installed_app_id = %installed_app_id,
                "Persisted ApplicationId update to database"
            );
        }

        // Use the verified installed application
        *application = Some(installed_application);

        Ok(())
    }

    /// Handle DAG synchronization for uninitialized nodes or nodes with incomplete DAGs
    async fn handle_dag_sync(
        &self,
        context_id: ContextId,
        context: &calimero_primitives::context::Context,
        chosen_peer: PeerId,
        our_identity: PublicKey,
        stream: &mut Stream,
    ) -> eyre::Result<Option<SyncProtocol>> {
        let is_uninitialized = *context.root_hash == [0; 32];

        // Check for incomplete sync from a previous run (crash recovery)
        let has_incomplete_sync = self.check_sync_in_progress(context_id)?.is_some();
        if has_incomplete_sync {
            warn!(
                %context_id,
                "Detected incomplete snapshot sync from previous run, forcing re-sync"
            );
        }

        if is_uninitialized || has_incomplete_sync {
            info!(
                %context_id,
                %chosen_peer,
                is_uninitialized,
                has_incomplete_sync,
                "Node needs snapshot sync, checking if peer has state"
            );

            // Query peer's state to decide sync strategy
            let peer_state = self
                .query_peer_dag_state(context_id, chosen_peer, our_identity, stream)
                .await?;

            match peer_state {
                Some((peer_root_hash, _peer_dag_heads)) if *peer_root_hash != [0; 32] => {
                    // Peer has state - use snapshot sync for efficient bootstrap
                    info!(
                        %context_id,
                        %chosen_peer,
                        peer_root_hash = %peer_root_hash,
                        "Peer has state, using snapshot sync for bootstrap"
                    );

                    // Note: request_snapshot_sync opens its own stream, existing stream
                    // will be closed when this function returns
                    // force=false: This is bootstrap for uninitialized nodes
                    match self
                        .request_snapshot_sync(context_id, chosen_peer, false)
                        .await
                        .wrap_err("snapshot sync")
                    {
                        Ok(result) => {
                            info!(
                                %context_id,
                                %chosen_peer,
                                applied_records = result.applied_records,
                                boundary_root_hash = %result.boundary_root_hash,
                                dag_heads_count = result.dag_heads.len(),
                                "Snapshot sync completed successfully"
                            );

                            // CRITICAL: Add snapshot boundary checkpoints to DAG
                            // This ensures that when new deltas arrive referencing the
                            // snapshot boundary heads as parents, the DAG accepts them.
                            if !result.dag_heads.is_empty() {
                                let delta_store = self
                                    .node_state
                                    .delta_stores
                                    .entry(context_id)
                                    .or_insert_with(|| {
                                        crate::delta_store::DeltaStore::new(
                                            [0u8; 32],
                                            self.context_client.clone(),
                                            context_id,
                                            our_identity,
                                        )
                                    })
                                    .clone();

                                let checkpoints_added = delta_store
                                    .add_snapshot_checkpoints(
                                        result.dag_heads.clone(),
                                        *result.boundary_root_hash,
                                    )
                                    .await;

                                info!(
                                    %context_id,
                                    checkpoints_added,
                                    "Added snapshot boundary checkpoints to DAG"
                                );

                                match self.network_client.open_stream(chosen_peer).await {
                                    Ok(mut fine_stream) => {
                                        if let Err(e) = self
                                            .fine_sync_from_boundary(
                                                context_id,
                                                chosen_peer,
                                                our_identity,
                                                &mut fine_stream,
                                            )
                                            .await
                                        {
                                            warn!(
                                                %context_id,
                                                %chosen_peer,
                                                error = %e,
                                                "Fine-sync after snapshot failed, state may be slightly behind"
                                            );
                                        }
                                    }
                                    Err(e) => {
                                        warn!(
                                            %context_id,
                                            %chosen_peer,
                                            error = %e,
                                            "Fine-sync stream open failed, state may be slightly behind"
                                        );
                                    }
                                }
                            }

                            // Replay any buffered deltas (from uninitialized context period)
                            // This ensures handlers execute for deltas that arrived before sync completed
                            if let Some(buffered_deltas) =
                                self.node_state.end_sync_session(&context_id)
                            {
                                let buffered_count = buffered_deltas.len();
                                if buffered_count > 0 {
                                    info!(
                                        %context_id,
                                        buffered_count,
                                        "Replaying buffered deltas after snapshot sync (bootstrap path)"
                                    );
                                    self.replay_buffered_deltas(
                                        context_id,
                                        our_identity,
                                        buffered_deltas,
                                        chosen_peer,
                                    )
                                    .await;
                                }
                            }

                            return Ok(Some(SyncProtocol::Snapshot {
                                compressed: false,
                                verified: true,
                            }));
                        }
                        Err(e) => {
                            warn!(
                                %context_id,
                                %chosen_peer,
                                error = %e,
                                "Snapshot sync failed, will retry with another peer"
                            );
                            bail!("Snapshot sync failed: {}", e);
                        }
                    }
                }
                Some(_) => {
                    // Peer is also uninitialized, try next peer
                    info!(%context_id, %chosen_peer, "Peer also has no state, trying next peer");
                    bail!("Peer has no data for this context");
                }
                None => {
                    // Failed to query peer state
                    bail!("Failed to query peer state for context {}", context_id);
                }
            }
        }

        // Check if we have pending deltas (incomplete DAG)
        // Even if node has some state, it might be missing parent deltas
        if let Some(delta_store) = self.node_state.delta_stores.get(&context_id) {
            // NOTE: previously called `load_persisted_deltas()` here to
            // catch locally-created deltas from execute.rs that are in
            // the DB but not in the in-memory DAG. That rescan was
            // ~21% of CPU (pre #2244) and ~6% after. execute.rs and
            // create_context.rs now notify the node-side drainer via
            // `NodeClient::notify_local_applied_delta`, keeping the
            // DAG current without the per-sync full-column scan.
            let missing_result = delta_store.get_missing_parents().await;

            // Note: Cascaded events from DB loads are handled in state_delta handler
            if !missing_result.cascaded_events.is_empty() {
                info!(
                    %context_id,
                    cascaded_count = missing_result.cascaded_events.len(),
                    "Cascaded deltas from DB load (handlers executed in state_delta path)"
                );
            }

            if !missing_result.missing_ids.is_empty() {
                warn!(
                    %context_id,
                    %chosen_peer,
                    missing_count = missing_result.missing_ids.len(),
                    "Node has incomplete DAG (pending deltas), requesting DAG heads to catch up"
                );

                // Request DAG heads just like uninitialized nodes
                let result = self
                    .request_dag_heads_and_sync(context_id, chosen_peer, our_identity, stream)
                    .await
                    .wrap_err("request DAG heads and sync")?;

                // If peer had no data, return error to try next peer
                if matches!(result, SyncProtocol::None) {
                    bail!("Peer has no data for this context");
                }

                return Ok(Some(result));
            }
        }

        // Compare our state with peer's state even if we think we're in sync.
        // The peer might have new heads we don't know about (e.g., if gossipsub messages were lost).
        let peer_state = self
            .query_peer_dag_state(context_id, chosen_peer, our_identity, stream)
            .await?;

        if let Some((peer_root_hash, peer_dag_heads)) = peer_state {
            // Build handshakes for protocol selection (CIP §2.3)
            // Uses shared functions from calimero_node_primitives::sync::state_machine
            let local_hs = self.build_local_handshake(context);
            let remote_hs = Self::build_remote_handshake(peer_root_hash, &peer_dag_heads);

            // Select optimal sync protocol based on state comparison
            let selection = select_protocol(&local_hs, &remote_hs);

            info!(
                %context_id,
                %chosen_peer,
                protocol = ?selection.protocol,
                reason = %selection.reason,
                local_root = %context.root_hash,
                remote_root = %peer_root_hash,
                local_entities = local_hs.entity_count,
                remote_entities = remote_hs.entity_count,
                "Protocol selected"
            );

            // Dispatch based on selected protocol
            match selection.protocol {
                SyncProtocol::None => {
                    debug!(
                        %context_id,
                        %chosen_peer,
                        root_hash = %context.root_hash,
                        reason = %selection.reason,
                        "No sync needed: {}",
                        selection.reason
                    );
                    return Ok(None);
                }
                SyncProtocol::Snapshot { compressed, .. } => {
                    // Snapshot sync - use existing handler
                    info!(
                        %context_id,
                        %chosen_peer,
                        compressed,
                        reason = %selection.reason,
                        "Initiating snapshot sync"
                    );
                    let result = self
                        .fallback_to_snapshot_sync(context_id, our_identity, chosen_peer)
                        .await
                        .wrap_err("snapshot sync")?;
                    return Ok(Some(result));
                }
                SyncProtocol::DeltaSync { .. } => {
                    // Delta sync - use existing DAG heads request mechanism
                    info!(
                        %context_id,
                        %chosen_peer,
                        reason = %selection.reason,
                        "Initiating delta sync via DAG heads request"
                    );
                    let result = self
                        .request_dag_heads_and_sync(context_id, chosen_peer, our_identity, stream)
                        .await
                        .wrap_err("delta sync")?;

                    if matches!(result, SyncProtocol::None) {
                        bail!("Peer has no data for this context");
                    }

                    return Ok(Some(result));
                }
                SyncProtocol::HashComparison { root_hash, .. } => {
                    // Execute HashComparison sync (CIP §4)
                    info!(
                        %context_id,
                        reason = %selection.reason,
                        "Starting HashComparison sync"
                    );

                    // Wrap stream in transport abstraction
                    let mut transport = super::stream::StreamTransport::new(stream);

                    // Get store for protocol execution
                    let store = self.context_client.datastore_handle().into_inner();
                    let config = HashComparisonConfig {
                        remote_root_hash: root_hash,
                    };

                    match HashComparisonProtocol::run_initiator(
                        &mut transport,
                        &store,
                        context_id,
                        our_identity,
                        config,
                    )
                    .await
                    {
                        Ok(stats) => {
                            info!(
                                %context_id,
                                nodes_compared = stats.nodes_compared,
                                entities_merged = stats.entities_merged,
                                nodes_skipped = stats.nodes_skipped,
                                "HashComparison sync completed successfully"
                            );
                            return Ok(Some(SyncProtocol::HashComparison {
                                root_hash,
                                divergent_subtrees: vec![],
                            }));
                        }
                        Err(e) => {
                            warn!(
                                %context_id,
                                error = %e,
                                "HashComparison sync failed, falling back to DAG catchup"
                            );
                            // Fall back to DAG heads request
                            let result = self
                                .request_dag_heads_and_sync(
                                    context_id,
                                    chosen_peer,
                                    our_identity,
                                    stream,
                                )
                                .await
                                .wrap_err("hash comparison fallback")?;

                            if matches!(result, SyncProtocol::None) {
                                // If DAG catchup doesn't work, try snapshot as last resort
                                info!(
                                    %context_id,
                                    "DAG catchup failed, falling back to snapshot sync"
                                );
                                let result = self
                                    .fallback_to_snapshot_sync(
                                        context_id,
                                        our_identity,
                                        chosen_peer,
                                    )
                                    .await
                                    .wrap_err("snapshot fallback")?;
                                return Ok(Some(result));
                            }

                            return Ok(Some(result));
                        }
                    }
                }
                SyncProtocol::BloomFilter { .. } => {
                    warn!(
                        %context_id,
                        reason = %selection.reason,
                        "BloomFilter not yet implemented, falling back to snapshot"
                    );
                    let result = self
                        .fallback_to_snapshot_sync(context_id, our_identity, chosen_peer)
                        .await
                        .wrap_err("bloom filter fallback")?;
                    return Ok(Some(result));
                }
                SyncProtocol::SubtreePrefetch { .. } => {
                    warn!(
                        %context_id,
                        reason = %selection.reason,
                        "SubtreePrefetch not yet implemented, falling back to snapshot"
                    );
                    let result = self
                        .fallback_to_snapshot_sync(context_id, our_identity, chosen_peer)
                        .await
                        .wrap_err("subtree prefetch fallback")?;
                    return Ok(Some(result));
                }
                SyncProtocol::LevelWise { max_depth } => {
                    // Execute LevelWise sync (CIP Appendix B)
                    info!(
                        %context_id,
                        max_depth,
                        reason = %selection.reason,
                        "Starting LevelWise sync"
                    );

                    // Wrap stream in transport abstraction
                    let mut transport = super::stream::StreamTransport::new(stream);

                    // Get store for protocol execution
                    let store = self.context_client.datastore_handle().into_inner();
                    let config = LevelWiseConfig {
                        remote_root_hash: *peer_root_hash,
                        max_depth,
                    };

                    match LevelWiseProtocol::run_initiator(
                        &mut transport,
                        &store,
                        context_id,
                        our_identity,
                        config,
                    )
                    .await
                    {
                        Ok(stats) => {
                            info!(
                                %context_id,
                                levels_synced = stats.levels_synced,
                                nodes_compared = stats.nodes_compared,
                                entities_merged = stats.entities_merged,
                                nodes_skipped = stats.nodes_skipped,
                                "LevelWise sync completed successfully"
                            );
                            return Ok(Some(SyncProtocol::LevelWise { max_depth }));
                        }
                        Err(e) => {
                            warn!(
                                %context_id,
                                error = %e,
                                "LevelWise sync failed, falling back to DAG catchup"
                            );
                            // Fall back to DAG heads request - open a new stream since the
                            // LevelWise protocol may have left the peer's responder in a state
                            // where it expects LevelWiseRequest messages, not DagHeadsRequest.
                            let mut fallback_stream = self
                                .network_client
                                .open_stream(chosen_peer)
                                .await
                                .wrap_err("open stream for level-wise fallback")?;
                            let result = self
                                .request_dag_heads_and_sync(
                                    context_id,
                                    chosen_peer,
                                    our_identity,
                                    &mut fallback_stream,
                                )
                                .await
                                .wrap_err("level-wise fallback")?;

                            if matches!(result, SyncProtocol::None) {
                                // If DAG catchup doesn't work, try snapshot as last resort
                                info!(
                                    %context_id,
                                    "DAG catchup insufficient, attempting snapshot"
                                );
                                // Drop the consumed fallback_stream before opening fresh streams
                                // in snapshot sync (fallback_stream is in indeterminate state
                                // after DAG sync exchanges)
                                drop(fallback_stream);
                                let snapshot_result = self
                                    .fallback_to_snapshot_sync(
                                        context_id,
                                        our_identity,
                                        chosen_peer,
                                    )
                                    .await
                                    .wrap_err("level-wise snapshot fallback")?;
                                return Ok(Some(snapshot_result));
                            }
                            return Ok(Some(result));
                        }
                    }
                }
            }
        }

        Ok(None)
    }

    /// Query peer for their DAG state (root_hash and dag_heads) without triggering full sync.
    ///
    /// Returns `Ok(Some((root_hash, dag_heads)))` if peer responded successfully,
    /// `Ok(None)` if peer had no valid response or no state, or `Err` on communication error.
    async fn query_peer_dag_state(
        &self,
        context_id: ContextId,
        chosen_peer: PeerId,
        our_identity: PublicKey,
        stream: &mut Stream,
    ) -> eyre::Result<Option<(calimero_primitives::hash::Hash, Vec<[u8; DIGEST_SIZE]>)>> {
        let request_msg = StreamMessage::Init {
            context_id,
            party_id: our_identity,
            payload: InitPayload::DagHeadsRequest { context_id },
            next_nonce: rand::thread_rng().gen(),
        };

        self.send(stream, &request_msg, None).await?;

        let response = self.recv(stream, None).await?;

        match response {
            Some(StreamMessage::Message {
                payload:
                    MessagePayload::DagHeadsResponse {
                        dag_heads,
                        root_hash,
                    },
                ..
            }) => {
                debug!(
                    %context_id,
                    %chosen_peer,
                    heads_count = dag_heads.len(),
                    peer_root_hash = %root_hash,
                    "Received peer DAG state for comparison"
                );
                Ok(Some((root_hash, dag_heads)))
            }
            _ => {
                debug!(%context_id, %chosen_peer, "Failed to get peer DAG state for comparison");
                Ok(None)
            }
        }
    }

    async fn initiate_sync_inner(
        &self,
        context_id: ContextId,
        chosen_peer: PeerId,
    ) -> eyre::Result<SyncProtocol> {
        let sync_start = Instant::now();

        let mut context = self
            .context_client
            .sync_context_config(context_id, None)
            .await?;

        let is_uninitialized = *context.root_hash == [0; 32];
        info!(
            %context_id,
            %chosen_peer,
            is_uninitialized,
            root_hash = %context.root_hash,
            dag_heads_count = context.dag_heads.len(),
            application_id = %context.application_id,
            "Starting sync session"
        );

        // Get application - if not found, we'll try to install it after blob sharing
        let mut application = self.node_client.get_application(&context.application_id)?;

        // Get blob_id and app config for later use
        let (blob_id, app_config_opt) = self.get_blob_info(&context_id, &application).await?;

        let identities = self
            .context_client
            .get_context_members(&context.id, Some(true));

        let Some((our_identity, _)) = choose_stream(identities, &mut rand::thread_rng())
            .await
            .transpose()?
        else {
            bail!("no owned identities found for context: {}", context.id);
        };

        let mut stream = self
            .network_client
            .open_stream(chosen_peer)
            .await
            .wrap_err("open stream for sync")?;

        // Key share phase removed — group key envelopes handle key distribution.
        let key_share_elapsed = std::time::Duration::ZERO;
        debug!(
            %context_id,
            %chosen_peer,
            ?key_share_elapsed,
            "Phase 1/3 complete: key share"
        );

        // Phase 2: Blob share (if needed)
        if !self.node_client.has_blob(&blob_id)? {
            let phase_start = Instant::now();
            // Get size from application config if we don't have application yet
            let size = self
                .get_application_size(&context_id, &application, &app_config_opt)
                .await?;

            self.initiate_blob_share_process(&context, our_identity, blob_id, size, &mut stream)
                .await
                .wrap_err("blob share")?;

            let blob_share_elapsed = phase_start.elapsed();
            debug!(
                %context_id,
                %chosen_peer,
                ?blob_share_elapsed,
                "Phase 2/3 complete: blob share"
            );

            // After blob sharing, try to install application if it doesn't exist
            // or if we only have a stub (size==0 from join_context bootstrap)
            let needs_install =
                application.is_none() || application.as_ref().is_some_and(|app| app.size == 0);
            if needs_install {
                self.install_bundle_after_blob_sharing(
                    &context_id,
                    &blob_id,
                    &app_config_opt,
                    &mut context,
                    &mut application,
                )
                .await
                .wrap_err("install bundle after blob share")?;
            }
        }

        let Some(_application) = application else {
            if context.application_id
                == calimero_primitives::application::ApplicationId::from([0u8; 32])
            {
                bail!("context has placeholder application ID — waiting for governance op to resolve it");
            }
            bail!("application not found: {}", context.application_id);
        };

        // Phase 3: DAG synchronization (if needed — uninitialized or incomplete DAG)
        let phase_start = Instant::now();
        if let Some(result) = self
            .handle_dag_sync(context_id, &context, chosen_peer, our_identity, &mut stream)
            .await
            .wrap_err("DAG sync")?
        {
            let dag_sync_elapsed = phase_start.elapsed();
            let total_elapsed = sync_start.elapsed();
            info!(
                %context_id,
                %chosen_peer,
                ?key_share_elapsed,
                ?dag_sync_elapsed,
                ?total_elapsed,
                protocol = ?result,
                "Sync session complete (DAG sync performed)"
            );
            return Ok(result);
        }

        let total_elapsed = sync_start.elapsed();
        // Otherwise, DAG-based sync happens automatically via BroadcastMessage::StateDelta
        debug!(
            %context_id,
            %chosen_peer,
            ?key_share_elapsed,
            ?total_elapsed,
            "Sync session complete: node is in sync, no active protocol needed"
        );
        Ok(SyncProtocol::None)
    }

    /// Request peer's DAG heads and sync all missing deltas
    async fn request_dag_heads_and_sync(
        &self,
        context_id: ContextId,
        peer_id: PeerId,
        our_identity: PublicKey,
        stream: &mut Stream,
    ) -> eyre::Result<SyncProtocol> {
        use calimero_node_primitives::sync::{InitPayload, MessagePayload, StreamMessage};

        // Send DAG heads request
        let request_msg = StreamMessage::Init {
            context_id,
            party_id: our_identity,
            payload: InitPayload::DagHeadsRequest { context_id },
            next_nonce: {
                use rand::Rng;
                rand::thread_rng().gen()
            },
        };

        self.send(stream, &request_msg, None).await?;

        // Receive response
        let response = self.recv(stream, None).await?;

        match response {
            Some(StreamMessage::Message {
                payload:
                    MessagePayload::DagHeadsResponse {
                        dag_heads,
                        root_hash,
                    },
                ..
            }) => {
                info!(
                    %context_id,
                    heads_count = dag_heads.len(),
                    peer_root_hash = %root_hash,
                    "Received DAG heads from peer, requesting deltas"
                );

                // Check if peer has state even without DAG heads
                if dag_heads.is_empty() && *root_hash != [0; 32] {
                    error!(
                        %context_id,
                        peer_root_hash = %root_hash,
                        "Peer has state but no DAG heads!"
                    );
                    bail!(
                        "Peer has state but no DAG heads (migration issue). \
                         Clear data directories on both nodes and recreate context."
                    );
                }

                if dag_heads.is_empty() {
                    info!(%context_id, "Peer also has no deltas and no state, will try next peer");
                    // Return None to signal caller to try next peer
                    return Ok(SyncProtocol::None);
                }

                // CRITICAL FIX: Fetch ALL DAG heads first, THEN request missing parents
                // This ensures we don't miss sibling heads that might be the missing parents

                // Get or create DeltaStore for this context (do this once before the loop)
                let (delta_store_ref, is_new) = {
                    let mut is_new = false;
                    let delta_store = self
                        .node_state
                        .delta_stores
                        .entry(context_id)
                        .or_insert_with(|| {
                            is_new = true;
                            crate::delta_store::DeltaStore::new(
                                [0u8; 32],
                                self.context_client.clone(),
                                context_id,
                                our_identity,
                            )
                        });

                    (delta_store.clone(), is_new)
                };

                // The previous revision ran `load_persisted_deltas`
                // unconditionally here on every sync — the rescan
                // dominated the hot path. execute.rs now notifies the
                // node-side drainer directly, so warm stores don't
                // need rehydration. But when *this* path is the first
                // to create the DeltaStore for a context (fresh boot,
                // sync arrives before the first local execute), the
                // in-memory DAG is empty and we still need a one-time
                // load so `get_delta` can serve peers and missing-
                // parent queries have the right picture.
                if is_new {
                    if let Err(e) = delta_store_ref.load_persisted_deltas().await {
                        warn!(
                            ?e,
                            %context_id,
                            "Failed to hydrate freshly-created DeltaStore from DB"
                        );
                    }
                }

                // Phase 1: Request and add ALL DAG heads
                for head_id in &dag_heads {
                    info!(
                        %context_id,
                        head_id = ?head_id,
                        "Requesting DAG head delta from peer"
                    );

                    let delta_request = StreamMessage::Init {
                        context_id,
                        party_id: our_identity,
                        payload: InitPayload::DeltaRequest {
                            context_id,
                            delta_id: *head_id,
                        },
                        next_nonce: {
                            use rand::Rng;
                            rand::thread_rng().gen()
                        },
                    };

                    self.send(stream, &delta_request, None).await?;

                    let delta_response = self.recv(stream, None).await?;

                    match delta_response {
                        Some(StreamMessage::Message {
                            payload: MessagePayload::DeltaResponse { delta },
                            ..
                        }) => {
                            // Deserialize and add to DAG
                            let storage_delta: calimero_storage::delta::CausalDelta =
                                borsh::from_slice(&delta)?;

                            let dag_delta = calimero_dag::CausalDelta {
                                id: storage_delta.id,
                                parents: storage_delta.parents,
                                payload: storage_delta.actions,
                                hlc: storage_delta.hlc,
                                expected_root_hash: storage_delta.expected_root_hash,
                                kind: calimero_dag::DeltaKind::Regular,
                            };

                            if let Err(e) = delta_store_ref.add_delta(dag_delta).await {
                                warn!(
                                    ?e,
                                    %context_id,
                                    head_id = ?head_id,
                                    "Failed to add DAG head delta"
                                );
                            } else {
                                info!(
                                    %context_id,
                                    head_id = ?head_id,
                                    "Successfully added DAG head delta"
                                );
                            }
                        }
                        Some(StreamMessage::Message {
                            payload:
                                MessagePayload::SnapshotError {
                                    error:
                                        calimero_node_primitives::sync::SnapshotError::SnapshotRequired,
                                },
                            ..
                        }) => {
                            info!(
                                %context_id,
                                head_id = ?head_id,
                                "Peer's delta history is pruned, falling back to snapshot sync"
                            );
                            // Fall back to snapshot sync
                            return self
                                .fallback_to_snapshot_sync(context_id, our_identity, peer_id)
                                .await;
                        }
                        Some(StreamMessage::Message {
                            payload: MessagePayload::DeltaNotFound,
                            ..
                        }) => {
                            warn!(
                                %context_id,
                                head_id = ?head_id,
                                "Peer doesn't have requested DAG head delta"
                            );
                            // Continue trying other heads
                        }
                        _ => {
                            warn!(%context_id, head_id = ?head_id, "Unexpected response to delta request");
                        }
                    }
                }

                // Phase 2: Now check for missing parents and fetch them recursively
                let missing_result = delta_store_ref.get_missing_parents().await;

                // Note: Cascaded events from DB loads logged but not executed here (state_delta handler will catch them)
                if !missing_result.cascaded_events.is_empty() {
                    info!(
                        %context_id,
                        cascaded_count = missing_result.cascaded_events.len(),
                        "Cascaded deltas from DB load during DAG head sync"
                    );
                }

                // Steady-state: the initial DAG-heads response matched local
                // state, so there are no missing parents to chase. Skip the
                // entire retry-and-final-check machinery on the common path.
                if missing_result.missing_ids.is_empty() {
                    return Ok(SyncProtocol::DeltaSync {
                        missing_delta_ids: vec![],
                    });
                }

                info!(
                    %context_id,
                    missing_count = missing_result.missing_ids.len(),
                    "DAG heads have missing parents, requesting them recursively"
                );

                // First attempt: the peer that served DAG heads.
                if let Err(e) = self
                    .request_missing_deltas(
                        context_id,
                        missing_result.missing_ids,
                        peer_id,
                        delta_store_ref.clone(),
                        our_identity,
                    )
                    .await
                {
                    warn!(
                        ?e,
                        %context_id,
                        "Failed to request missing parent deltas from initial peer"
                    );
                }

                // Cross-peer fallback for cold-start race (#2198): if the
                // initial peer did not resolve every missing parent, iterate
                // other mesh peers for this context until the DAG is whole
                // or the retry budget is exhausted.
                let topic = TopicHash::from_raw(context_id);
                let mut budget = super::parent_pull::ParentPullBudget::new(
                    peer_id,
                    self.sync_config.parent_pull_additional_peers,
                    self.sync_config.parent_pull_budget,
                );
                let mut mesh_peers = self.network_client.mesh_peers(topic.clone()).await;

                loop {
                    let after = delta_store_ref.get_missing_parents().await;
                    if after.missing_ids.is_empty() {
                        break; // fully resolved
                    }

                    let next_peer = match budget.next(&mesh_peers) {
                        super::parent_pull::NextPeer::Peer(p) => p,
                        super::parent_pull::NextPeer::RefetchMesh => {
                            mesh_peers = self.network_client.mesh_peers(topic.clone()).await;
                            budget.record_refetch();
                            match budget.next(&mesh_peers) {
                                super::parent_pull::NextPeer::Peer(p) => p,
                                other => {
                                    debug!(
                                        %context_id,
                                        ?other,
                                        "no additional mesh peers available for parent pull"
                                    );
                                    break;
                                }
                            }
                        }
                        super::parent_pull::NextPeer::BudgetExhausted => {
                            warn!(
                                %context_id,
                                "parent-pull budget exhausted"
                            );
                            break;
                        }
                        super::parent_pull::NextPeer::MaxPeersReached
                        | super::parent_pull::NextPeer::NoMorePeers => break,
                    };

                    budget.record_attempt(next_peer);

                    info!(
                        %context_id,
                        ?next_peer,
                        attempt = budget.attempts(),
                        still_missing = after.missing_ids.len(),
                        "retrying missing-parent fetch against additional mesh peer"
                    );

                    if let Err(e) = self
                        .request_missing_deltas(
                            context_id,
                            after.missing_ids,
                            next_peer,
                            delta_store_ref.clone(),
                            our_identity,
                        )
                        .await
                    {
                        warn!(
                            ?e,
                            %context_id,
                            ?next_peer,
                            "cross-peer parent-pull attempt failed"
                        );
                    }
                }

                // Final check: if pending parents still remain, the sync did
                // NOT fully restore the DAG. Return an error so the caller
                // (e.g. join_context) surfaces a real failure instead of
                // silent success on a partially-applied DAG.
                let final_missing = delta_store_ref.get_missing_parents().await;
                if !final_missing.missing_ids.is_empty() {
                    warn!(
                        %context_id,
                        remaining = final_missing.missing_ids.len(),
                        peer_attempts = budget.total_attempts(),
                        "DAG sync ended with unresolved missing parents"
                    );
                    bail!(
                        "pending parents unresolved for context {}: {} remaining after {} peer attempt(s)",
                        context_id,
                        final_missing.missing_ids.len(),
                        budget.total_attempts(),
                    );
                }

                // Success: DAG is fully resolved.
                Ok(SyncProtocol::DeltaSync {
                    missing_delta_ids: vec![],
                })
            }
            _ => {
                warn!(%context_id, "Unexpected response to DAG heads request, trying next peer");
                Ok(SyncProtocol::None)
            }
        }
    }

    /// Fall back to full snapshot sync when delta sync is not possible.
    ///
    /// Implements Invariant I6: Deltas received during sync are buffered and
    /// replayed after sync completes. On error, buffered deltas are discarded
    /// via `cancel_sync_session()`.
    async fn fallback_to_snapshot_sync(
        &self,
        context_id: ContextId,
        our_identity: PublicKey,
        peer_id: PeerId,
    ) -> eyre::Result<SyncProtocol> {
        info!(%context_id, %peer_id, "Initiating snapshot sync");

        // Start buffering deltas that arrive during snapshot sync (Invariant I6)
        // Use current time as sync start HLC
        let sync_start_hlc = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .map(|d| d.as_nanos() as u64)
            .unwrap_or(0);
        self.node_state
            .start_sync_session(context_id, sync_start_hlc);

        // force=false: Enforce Invariant I5 - only allow snapshot on fresh nodes.
        // If the node has state, this will fail, which is correct - divergence
        // or pruned history on initialized nodes cannot be safely resolved via
        // snapshot overwrite. CRDT merge must be used instead.
        let result = match self.request_snapshot_sync(context_id, peer_id, false).await {
            Ok(r) => r,
            Err(e) => {
                // Cancel sync session on failure - discard buffered deltas
                // since the context state is inconsistent
                self.node_state.cancel_sync_session(&context_id);
                return Err(e);
            }
        };
        info!(%context_id, records = result.applied_records, "Snapshot sync completed");

        // End buffering and get any deltas that arrived during sync
        let buffered_deltas = self.node_state.end_sync_session(&context_id);
        let buffered_count = buffered_deltas.as_ref().map_or(0, Vec::len);

        if buffered_count > 0 {
            info!(
                %context_id,
                buffered_count,
                "Replaying buffered deltas after snapshot sync"
            );

            // Replay buffered deltas - now that context is initialized, we can process them
            if let Some(deltas) = buffered_deltas {
                self.replay_buffered_deltas(context_id, our_identity, deltas, peer_id)
                    .await;
            }
        }

        // Fine-sync to catch any deltas since the snapshot boundary
        if !result.dag_heads.is_empty() {
            let mut stream = self.network_client.open_stream(peer_id).await?;
            if let Err(e) = self
                .fine_sync_from_boundary(context_id, peer_id, our_identity, &mut stream)
                .await
            {
                warn!(?e, %context_id, "Fine-sync failed, state may be slightly behind");
            }
        }

        Ok(SyncProtocol::Snapshot {
            compressed: false,
            verified: true,
        })
    }

    /// Replay buffered deltas after snapshot sync completes.
    ///
    /// This ensures that:
    /// 1. Deltas arriving during sync aren't lost
    /// 2. Event handlers execute for buffered deltas
    /// 3. Ancestor deltas (whose state is covered by checkpoint) get handlers executed
    async fn replay_buffered_deltas(
        &self,
        context_id: ContextId,
        our_identity: PublicKey,
        mut deltas: Vec<calimero_node_primitives::delta_buffer::BufferedDelta>,
        _fallback_peer: PeerId,
    ) {
        use crate::handlers::state_delta::{replay_buffered_delta, ReplayBufferedDeltaInput};
        use std::collections::{HashMap, HashSet};

        // #2319 determinism: deltas land in the buffer in gossipsub
        // arrival order, which differs node-to-node — replaying them in
        // that order makes two nodes apply *concurrent* deltas to storage
        // in different sequences, which (for any merge that isn't
        // perfectly order-independent) yields a different Merkle root for
        // the same delta set. Replay in a canonical, causally-consistent
        // order — HLC, then delta id as a tiebreaker — so every node
        // applies the same sequence. (The DAG cascade still re-orders for
        // genuine causal dependencies; this only pins the order of
        // concurrent ones.)
        deltas.sort_by(|a, b| a.hlc.cmp(&b.hlc).then_with(|| a.id.cmp(&b.id)));

        // Build a set of IDs that are "covered" by the snapshot
        // This includes:
        // 1. Deltas that match checkpoints directly
        // 2. Deltas that are ancestors of checkpoints (their state is included in snapshot)
        let mut covered_delta_ids: HashSet<[u8; 32]> = HashSet::new();

        // Get the delta store to check for existing checkpoints.
        // If this path is the first to create the DeltaStore, hydrate
        // from DB once — incremental updates via execute.rs handle the
        // warm-store case, but a fresh store here would otherwise miss
        // everything on disk and we'd later fail to match checkpoints.
        let (delta_store, is_new) = {
            let mut is_new = false;
            let entry = self
                .node_state
                .delta_stores
                .entry(context_id)
                .or_insert_with(|| {
                    is_new = true;
                    crate::delta_store::DeltaStore::new(
                        [0u8; 32],
                        self.context_client.clone(),
                        context_id,
                        our_identity,
                    )
                });
            (entry.clone(), is_new)
        };
        if is_new {
            if let Err(e) = delta_store.load_persisted_deltas().await {
                warn!(
                    ?e,
                    %context_id,
                    "Failed to hydrate freshly-created DeltaStore from DB"
                );
            }
        }

        // Build parent -> children map from buffered deltas
        let mut parent_to_children: HashMap<[u8; 32], Vec<[u8; 32]>> = HashMap::new();
        for buffered in &deltas {
            for parent in &buffered.parents {
                parent_to_children
                    .entry(*parent)
                    .or_default()
                    .push(buffered.id);
            }
        }

        // Identify which buffered deltas match existing checkpoints
        let mut checkpoint_matches: Vec<[u8; 32]> = Vec::new();
        for buffered in &deltas {
            if delta_store.dag_has_delta_applied(&buffered.id).await {
                checkpoint_matches.push(buffered.id);
                covered_delta_ids.insert(buffered.id);
            }
        }

        // Propagate "covered" status backwards through the parent chain
        // If delta D has a child C that is covered, then D is also covered
        // (D's state is included in C's checkpoint)
        let delta_ids: HashSet<[u8; 32]> = deltas.iter().map(|d| d.id).collect();
        let delta_parents: HashMap<[u8; 32], Vec<[u8; 32]>> =
            deltas.iter().map(|d| (d.id, d.parents.clone())).collect();

        // BFS backwards from checkpoint matches
        let mut queue: std::collections::VecDeque<[u8; 32]> =
            checkpoint_matches.iter().copied().collect();
        while let Some(child_id) = queue.pop_front() {
            // Get parents of this delta (if it's one of our buffered deltas)
            if let Some(parents) = delta_parents.get(&child_id) {
                for parent_id in parents {
                    // If parent is also a buffered delta and not yet covered
                    if delta_ids.contains(parent_id) && !covered_delta_ids.contains(parent_id) {
                        covered_delta_ids.insert(*parent_id);
                        queue.push_back(*parent_id);
                    }
                }
            }
        }

        if !covered_delta_ids.is_empty() {
            info!(
                %context_id,
                covered_count = covered_delta_ids.len(),
                checkpoint_matches = checkpoint_matches.len(),
                total_buffered = deltas.len(),
                "Identified buffered deltas covered by snapshot checkpoint"
            );
        }

        for buffered in deltas {
            let delta_id = buffered.id;
            let has_events = buffered.events.is_some();
            let is_covered_by_checkpoint = covered_delta_ids.contains(&delta_id);

            match replay_buffered_delta(ReplayBufferedDeltaInput {
                context_client: self.context_client.clone(),
                node_client: self.node_client.clone(),
                node_state: self.node_state.clone(),
                context_id,
                our_identity,
                buffered,
                sync_timeout: self.sync_config.timeout,
                is_covered_by_checkpoint,
            })
            .await
            {
                Ok(applied) => {
                    if applied {
                        info!(
                            %context_id,
                            delta_id = ?delta_id,
                            has_events,
                            "Replayed buffered delta successfully"
                        );
                    } else if is_covered_by_checkpoint {
                        debug!(
                            %context_id,
                            delta_id = ?delta_id,
                            "Buffered delta is ancestor of checkpoint (state covered, handlers executed)"
                        );
                    } else {
                        debug!(
                            %context_id,
                            delta_id = ?delta_id,
                            "Buffered delta went to pending (missing parents)"
                        );
                    }
                }
                Err(e) => {
                    warn!(
                        %context_id,
                        delta_id = ?delta_id,
                        error = %e,
                        "Failed to replay buffered delta"
                    );
                }
            }
        }
    }

    /// Fine-sync from snapshot boundary to catch up to latest state.
    async fn fine_sync_from_boundary(
        &self,
        context_id: ContextId,
        peer_id: PeerId,
        our_identity: PublicKey,
        stream: &mut Stream,
    ) -> eyre::Result<()> {
        // Fresh DeltaStore created here must be hydrated once from DB;
        // warm stores are kept current by execute-side incremental
        // notifications.
        let (delta_store, is_new) = {
            let mut is_new = false;
            let entry = self
                .node_state
                .delta_stores
                .entry(context_id)
                .or_insert_with(|| {
                    is_new = true;
                    crate::delta_store::DeltaStore::new(
                        [0u8; 32],
                        self.context_client.clone(),
                        context_id,
                        our_identity,
                    )
                });
            (entry.clone(), is_new)
        };
        if is_new {
            if let Err(e) = delta_store.load_persisted_deltas().await {
                warn!(
                    ?e,
                    %context_id,
                    "Failed to hydrate freshly-created DeltaStore from DB"
                );
            }
        }

        let request_msg = StreamMessage::Init {
            context_id,
            party_id: our_identity,
            payload: InitPayload::DagHeadsRequest { context_id },
            next_nonce: rand::random(),
        };
        self.send(stream, &request_msg, None).await?;

        let response = self.recv(stream, None).await?;

        if let Some(StreamMessage::Message {
            payload: MessagePayload::DagHeadsResponse { dag_heads, .. },
            ..
        }) = response
        {
            let mut missing = Vec::new();
            for head in &dag_heads {
                if !delta_store.has_delta(head).await {
                    missing.push(*head);
                }
            }

            if !missing.is_empty() {
                self.request_missing_deltas(
                    context_id,
                    missing,
                    peer_id,
                    delta_store,
                    our_identity,
                )
                .await?;
            }
        }

        Ok(())
    }

    pub async fn handle_opened_stream(&self, peer_id: PeerId, mut stream: Box<Stream>) {
        loop {
            match self
                .internal_handle_opened_stream(peer_id, &mut stream)
                .await
            {
                Ok(None) => break,
                Ok(Some(())) => {}
                Err(err) => {
                    error!(%err, "Failed to handle stream message");

                    if let Err(err) = self
                        .send(&mut stream, &StreamMessage::OpaqueError, None)
                        .await
                    {
                        error!(%err, "Failed to send error message");
                    }
                }
            }
        }
    }

    async fn internal_handle_opened_stream(
        &self,
        peer_id: PeerId,
        stream: &mut Stream,
    ) -> eyre::Result<Option<()>> {
        let Some(message) = self.recv(stream, None).await? else {
            return Ok(None);
        };

        let (context_id, their_identity, payload, nonce) = match message {
            StreamMessage::Init {
                context_id,
                party_id,
                payload,
                next_nonce,
                ..
            } => (context_id, party_id, payload, next_nonce),
            unexpected @ (StreamMessage::Message { .. } | StreamMessage::OpaqueError) => {
                bail!("expected initialization handshake, got {:?}", unexpected)
            }
        };

        if let InitPayload::NamespaceBackfillRequest {
            namespace_id,
            delta_ids,
        } = &payload
        {
            self.handle_namespace_backfill_request(*namespace_id, delta_ids, stream, nonce)
                .await?;
            return Ok(Some(()));
        }

        if let InitPayload::NamespaceJoinRequest {
            namespace_id,
            ref invitation_bytes,
            joiner_public_key,
        } = &payload
        {
            self.handle_namespace_join_request(
                *namespace_id,
                invitation_bytes,
                *joiner_public_key,
                stream,
                nonce,
            )
            .await?;
            return Ok(Some(()));
        }

        if let InitPayload::OpenSubgroupJoinRequest {
            namespace_id,
            subgroup_id,
            joiner_public_key,
        } = &payload
        {
            self.handle_open_subgroup_join_request(
                *namespace_id,
                *subgroup_id,
                *joiner_public_key,
                stream,
                nonce,
            )
            .await?;
            return Ok(Some(()));
        }

        let context = match self.context_client.get_context(&context_id)? {
            Some(ctx) => ctx,
            None => {
                // Race window: the dialer can trigger context-level sync as
                // a cascade of namespace-topic subscription
                // (`subscriptions.rs::handle_subscribed` → `sync_group` /
                // `broadcast_group_local_state`) before this node's local
                // `join_context` materialises the context entry. If the
                // dialer is a member of the namespace this context belongs
                // to, the inbound stream is legitimate — just early. Brief
                // wait for materialisation, then proceed (or close if it
                // really is an unknown context). Same race shape as the
                // unknown-member catch-up below (#2237).
                let store = self.context_client.datastore();
                let dialer_is_namespace_member =
                    match calimero_context::group_store::get_group_for_context(store, &context_id)?
                    {
                        Some(group_id) => calimero_context::group_store::check_group_membership(
                            store,
                            &group_id,
                            &their_identity,
                        )?,
                        None => false,
                    };

                if !dialer_is_namespace_member {
                    // Genuinely unknown context (or cross-namespace stream
                    // leak per #2198). Close cleanly so unrelated sync
                    // activity is unaffected.
                    warn!(
                        %context_id,
                        ?their_identity,
                        "inbound stream for unknown context, closing cleanly"
                    );

                    if let Err(err) = self.send(stream, &StreamMessage::OpaqueError, None).await {
                        error!(%err, %context_id, "failed to send OpaqueError for unknown context");
                    }

                    return Ok(None);
                }

                // Bounded wait for local `join_context` to materialise the
                // context entry. Poll cadence matches `FALLBACK_POLL` in
                // `handlers/join_context.rs`. The 5 s budget comfortably
                // covers the ~5 s gap observed between namespace-membership
                // application and local context materialisation in the
                // `bdc61af` smoke-regression artefact; cold-start cases
                // beyond that fall through to the same OpaqueError as
                // before — the dialer retries on its next sync interval.
                const MATERIALIZATION_WINDOW: time::Duration = time::Duration::from_secs(5);
                const MATERIALIZATION_POLL: time::Duration = time::Duration::from_millis(200);

                let deadline = Instant::now() + MATERIALIZATION_WINDOW;
                let mut materialised = None;
                while Instant::now() < deadline {
                    time::sleep(MATERIALIZATION_POLL).await;
                    if let Some(ctx) = self.context_client.get_context(&context_id)? {
                        materialised = Some(ctx);
                        break;
                    }
                }

                match materialised {
                    Some(ctx) => {
                        debug!(
                            %context_id,
                            ?their_identity,
                            "context materialised during join race window, proceeding with inbound sync"
                        );
                        ctx
                    }
                    None => {
                        debug!(
                            %context_id,
                            ?their_identity,
                            "context not materialised within join race window, closing stream"
                        );
                        if let Err(err) = self.send(stream, &StreamMessage::OpaqueError, None).await
                        {
                            error!(
                                %err,
                                %context_id,
                                "failed to send OpaqueError for unknown context"
                            );
                        }
                        return Ok(None);
                    }
                }
            }
        };

        let mut _updated = None;

        // Issue #2256: also accept inheritance-eligible parent members
        // for sync auth. `has_member` only knows direct context-membership
        // and direct group-membership; the parent-walk for `Open` subgroups
        // lives in `calimero-context::group_store`, which we have access
        // to here at the node layer.
        let is_inherited_member = || -> eyre::Result<bool> {
            let store = self.context_client.datastore();
            let Some(group_id) =
                calimero_context::group_store::get_group_for_context(store, &context_id)?
            else {
                return Ok(false);
            };
            calimero_context::group_store::check_group_membership(store, &group_id, &their_identity)
        };

        if !self
            .context_client
            .has_member(&context_id, &their_identity)?
            && !is_inherited_member()?
        {
            _updated = Some(
                self.context_client
                    .sync_context_config(context_id, None)
                    .await?,
            );

            if !self
                .context_client
                .has_member(&context_id, &their_identity)?
                && !is_inherited_member()?
            {
                // The peer may have just published MemberAdded for themselves
                // (or their side of the governance DAG is ahead of ours) and
                // gossipsub hasn't delivered it yet. Instead of waiting and
                // hoping the gossip arrives, ask this peer directly for the
                // current namespace governance state on a separate stream —
                // it's the fastest path out of the "unknown member" state and
                // avoids a 30 s stall waiting for `NamespaceStateHeartbeat`.
                //
                // Fire-and-forget governance propagation (issue #2237) is the
                // underlying bug; this is a narrower mitigation in the
                // responder path that converts the terminal close into an
                // active catch-up request.
                self.request_governance_catchup_from_peer(peer_id, &context_id, &their_identity)
                    .await;

                if !self
                    .context_client
                    .has_member(&context_id, &their_identity)?
                    && !is_inherited_member()?
                {
                    // Catch-up didn't resolve it (peer returned nothing, peer
                    // also doesn't know, or the op chain isn't valid locally).
                    // Close gracefully — the initiator retries on their next
                    // sync interval. Demoted from warn to debug because this
                    // is expected during mesh formation and would otherwise
                    // spam logs on every cold join.
                    debug!(
                        %context_id,
                        %their_identity,
                        "unknown context member after namespace backfill request, closing stream"
                    );
                    return Ok(Some(()));
                }
            }
        }

        // Note: Concurrent syncs are already prevented by SyncState tracking
        // in the start() loop. When sync starts, last_sync is set to None.
        // When complete, it's set to Some(now).

        let identities = self
            .context_client
            .get_context_members(&context.id, Some(true));

        let Some((our_identity, _)) = choose_stream(identities, &mut rand::thread_rng())
            .await
            .transpose()?
        else {
            bail!("no owned identities found for context: {}", context.id);
        };

        match payload {
            InitPayload::BlobShare { blob_id } => {
                self.handle_blob_share_request(
                    &context,
                    our_identity,
                    their_identity,
                    blob_id,
                    stream,
                )
                .await?
            }
            // Old sync protocols removed - DAG uses gossipsub broadcast instead
            // Streams are only used for: KeyShare, BlobShare, DeltaRequest, DagHeadsRequest
            InitPayload::DeltaRequest {
                context_id: requested_context_id,
                delta_id,
            } => {
                // Handle delta request from peer
                self.handle_delta_request(requested_context_id, delta_id, stream)
                    .await?
            }
            InitPayload::DagHeadsRequest {
                context_id: requested_context_id,
            } => {
                // Handle DAG heads request from peer
                self.handle_dag_heads_request(requested_context_id, stream, nonce)
                    .await?
            }
            InitPayload::SnapshotBoundaryRequest {
                context_id: requested_context_id,
                requested_cutoff_timestamp,
            } => {
                // Handle snapshot boundary negotiation request from peer
                self.handle_snapshot_boundary_request(
                    requested_context_id,
                    requested_cutoff_timestamp,
                    stream,
                    nonce,
                )
                .await?
            }
            InitPayload::SnapshotStreamRequest {
                context_id: requested_context_id,
                boundary_root_hash,
                page_limit,
                byte_limit,
                resume_cursor,
            } => {
                // Handle snapshot stream request from peer
                self.handle_snapshot_stream_request(
                    requested_context_id,
                    boundary_root_hash,
                    page_limit,
                    byte_limit,
                    resume_cursor,
                    stream,
                    nonce,
                )
                .await?
            }
            InitPayload::TreeNodeRequest {
                context_id: requested_context_id,
                node_id,
                max_depth,
            } => {
                // Handle tree node request from peer (HashComparison sync)
                // Wrap stream in transport abstraction
                let mut transport = super::stream::StreamTransport::new(stream);
                self.handle_tree_node_request(
                    requested_context_id,
                    node_id,
                    max_depth,
                    &mut transport,
                    nonce,
                )
                .await?
            }
            InitPayload::LevelWiseRequest {
                context_id: requested_context_id,
                level: first_level,
                parent_ids: first_parent_ids,
            } => {
                // Handle LevelWise request from peer (LevelWise sync responder)
                // Wrap stream in transport abstraction
                let mut transport = super::stream::StreamTransport::new(stream);

                // Get store for protocol execution
                let store = self.context_client.datastore_handle().into_inner();

                // Use the already-resolved our_identity from the top of handle_sync_request
                // (avoids redundant lookup and ensures consistency with other handlers)

                // Build the first request data (already parsed above for routing)
                let first_request = super::level_sync::LevelWiseFirstRequest {
                    level: first_level,
                    parent_ids: first_parent_ids,
                };

                // Run the LevelWise responder via the trait method
                use calimero_node_primitives::sync::SyncProtocolExecutor;
                super::level_sync::LevelWiseProtocol::run_responder(
                    &mut transport,
                    &store,
                    requested_context_id,
                    our_identity,
                    first_request,
                )
                .await?
            }
            InitPayload::EntityPush { .. } => {
                // EntityPush is handled within the HashComparison responder loop,
                // not as a top-level stream init. If received here, it means a
                // protocol error — the initiator sent EntityPush outside of a
                // HashComparison session. Log and ignore.
                warn!("Received EntityPush outside of HashComparison session, ignoring");
            }
            InitPayload::NamespaceBackfillRequest { .. } => {
                unreachable!("handled by early return above")
            }
            InitPayload::NamespaceJoinRequest { .. } => {
                unreachable!("handled by early return above")
            }
            InitPayload::OpenSubgroupJoinRequest { .. } => {
                unreachable!("handled by early return above")
            }
        };

        Ok(Some(()))
    }

    /// Schedule reconcile-via-anchor for every per-context hash
    /// mismatch in `report`. Called by the namespace governance op
    /// receive handler after `MemberRemoved` / `MemberLeft` apply
    /// reports state-hash divergence from the signed claims.
    ///
    /// One sync attempt per divergent context (`hash_differs`):
    /// pick a connected anchor peer (via the trusted-anchor set for
    /// the op's group + the verified `peer_identities` cache),
    /// initiate Snapshot sync against that peer, and after sync
    /// completes compare the receiver's new root hash against the
    /// signed expected. Mismatch on post-adoption verify is logged
    /// loudly — a follow-up will tighten this into pre-adoption
    /// rejection with rollback once the store has transactional
    /// staging.
    ///
    /// `only_in_expected` and `only_in_actual` entries are NOT
    /// reconciled here — those buckets reflect namespace-DAG
    /// drift (a registration the receiver hasn't seen yet, or a
    /// registration the signer hadn't seen). The cross-DAG
    /// membership check on subsequent state deltas catches that
    /// via `Unknown { needed }` → buffer; routing them through
    /// anchor sync would burn bandwidth on cases the existing
    /// catch-up path handles correctly.
    pub async fn reconcile_after_divergence(
        &self,
        report: calimero_context_client::messages::DivergenceReport,
    ) {
        if report.hash_differs.is_empty() {
            // Distinguish "no divergence at all" (debug-level
            // bookkeeping) from "group-level divergence with no
            // per-context mismatch" (operator-visible: a member row
            // is missing or extra somewhere, but every context the
            // op touched still hashes the same). The latter is rare
            // enough that we want it surfaced, not buried at debug.
            // Per-context reconcile doesn't apply — there's no
            // signed canonical hash for the group-state alone to
            // pull state against — so we log and return. Subsequent
            // signed ops carry the corrected group-state hash and
            // the namespace-DAG buffer + cross-DAG check on later
            // state deltas closes the gap.
            if report.group_hash_diverges {
                tracing::warn!(
                    group_id = %hex::encode(report.group_id.to_bytes()),
                    op_kind = report.op_kind,
                    only_in_expected_count = report.only_in_expected.len(),
                    only_in_actual_count = report.only_in_actual.len(),
                    "reconcile-after-divergence: group-state hash diverges from signed expected, \
                     but no per-context hash mismatch is reconcilable here — convergence relies \
                     on the cross-DAG check against subsequent signed ops"
                );
            } else {
                tracing::debug!(
                    group_id = %hex::encode(report.group_id.to_bytes()),
                    op_kind = report.op_kind,
                    only_in_expected_count = report.only_in_expected.len(),
                    only_in_actual_count = report.only_in_actual.len(),
                    "reconcile-after-divergence: no per-context hash mismatches to reconcile; \
                     namespace-DAG drift (if any) is handled by the cross-DAG check on \
                     subsequent state deltas"
                );
            }
            return;
        }

        for (context_id, expected_root_hash) in &report.hash_differs {
            self.reconcile_one_divergent_context(
                report.group_id,
                *context_id,
                *expected_root_hash,
                report.op_kind,
            )
            .await;
        }
    }

    /// Reconcile a single divergent context against a trusted anchor.
    ///
    /// Returns silently after logging — there is no error to bubble
    /// up to the caller because reconcile is best-effort: a future
    /// arrival of another signed op, or a sync interval tick, will
    /// re-attempt convergence. A hard error here would only inflate
    /// noise; the warn logs are the operator signal.
    ///
    /// Backoff: prior failed attempts for the same context impose an
    /// exponential cooldown (see [`reconcile_cooldown`]). Within that
    /// window, this is a no-op — the next signed op or sync tick will
    /// re-trigger once cooldown lapses. A successful post-adoption
    /// verify clears the backoff state immediately.
    ///
    /// **Convergence is not guaranteed in one shot**: `initiate_sync`
    /// negotiates the protocol via the standard handshake (typically
    /// `HashComparison` or `DeltaSync` between two initialized peers).
    /// Snapshot overwrite is gated by the `force=false` invariant in
    /// `fallback_to_snapshot_sync` and won't run on an initialized
    /// divergent node — that is by design, because snapshot adoption
    /// after the fact requires transactional staging the store layer
    /// doesn't yet provide. CRDT merge will sometimes converge two
    /// divergent states to the signed expected hash and sometimes
    /// won't (e.g. the partition-window case where the receiver holds
    /// a write the signer's expected hash excludes). When it doesn't,
    /// `verify_post_reconcile_root_hash` flags the mismatch and the
    /// backoff records a failure — operator-investigation territory
    /// until pre-adoption rejection + rollback lands.
    async fn reconcile_one_divergent_context(
        &self,
        group_id: calimero_context_config::types::ContextGroupId,
        context_id: ContextId,
        expected_root_hash: [u8; 32],
        op_kind: &'static str,
    ) {
        if let Some((remaining, failures)) =
            reconcile_remaining_cooldown(&self.node_state.reconcile_attempts, &context_id)
        {
            tracing::debug!(
                %context_id,
                op_kind,
                consecutive_failures = failures,
                cooldown_remaining_secs = remaining.as_secs(),
                "reconcile-after-divergence: skipping — prior attempts failed and the \
                 per-context cooldown is still active; will re-attempt after backoff lapses"
            );
            return;
        }

        // Look up anchors by `group_id` directly (carried in the
        // divergence report) rather than re-deriving the group from
        // `context_id`. A late-joiner can have a missing
        // context→group mapping locally even though the group's
        // trusted-anchor set is well-defined; the report already
        // names the group authoritatively so use it as the source of
        // truth.
        let anchors = self.anchor_identities_for_group(&group_id);
        if anchors.is_empty() {
            tracing::warn!(
                %context_id,
                group_id = %hex::encode(group_id.to_bytes()),
                op_kind,
                "reconcile-after-divergence: no trusted anchors defined for this group — \
                 falling back to operator path (no automatic recovery)"
            );
            return;
        }

        // Pick an anchor from the gossipsub mesh on the context's
        // topic. The mesh is a superset of "peers known to host this
        // context" — same source the regular sync path uses.
        //
        // Randomise the order before filtering so that, when there
        // are multiple connected anchors, we don't always pick the
        // one gossipsub happens to list first. Matters for two
        // reasons: (a) load distribution across honest anchors when
        // one is slow; (b) a compromised anchor that consistently
        // sorts first in libp2p's mesh order can't monopolise
        // reconcile syncs without contention. Post-adoption hash
        // verification against the signed expected still defends
        // against any anchor serving non-canonical state.
        let topic = TopicHash::from_raw(context_id);
        let mut mesh_peers = self.network_client.mesh_peers(topic).await;
        let mesh_peer_count = mesh_peers.len();
        mesh_peers.shuffle(&mut rand::thread_rng());
        // Walk mesh peers explicitly so cache-miss skips are visible
        // to operators. A peer with no `peer_identities` entry has not
        // yet been observed signing a verified message in this group;
        // it is invisible to the anchor predicate even if it would be
        // an anchor in practice. Counting and logging those skips
        // distinguishes "no anchors reachable" from "anchors reachable
        // but cache hasn't warmed yet" in the no-anchor warn below.
        let mut peers_missing_cache_entry: usize = 0;
        let mut peers_known_not_anchor: usize = 0;
        let anchor_peer = mesh_peers.iter().copied().find(|peer| {
            match self.node_state.peer_identities.get(peer) {
                Some(ids) => {
                    if ids.iter().any(|id| anchors.contains(id)) {
                        true
                    } else {
                        peers_known_not_anchor += 1;
                        false
                    }
                }
                None => {
                    peers_missing_cache_entry += 1;
                    tracing::debug!(
                        %context_id,
                        %peer,
                        op_kind,
                        "reconcile-after-divergence: mesh peer skipped — no peer_identities \
                         cache entry yet (peer has not been observed signing a verified \
                         message); cache warms as the peer's signed traffic is processed"
                    );
                    false
                }
            }
        });
        let Some(anchor_peer) = anchor_peer else {
            tracing::warn!(
                %context_id,
                op_kind,
                anchor_count = anchors.len(),
                connected_mesh_peers = mesh_peer_count,
                peers_missing_cache_entry,
                peers_known_not_anchor,
                "reconcile-after-divergence: no connected mesh peer matches the anchor set — \
                 falling back to operator path; reconcile will re-attempt on the next signed \
                 op or sync tick"
            );
            return;
        };

        tracing::info!(
            %context_id,
            %anchor_peer,
            op_kind,
            expected_root_hash = %hex::encode(expected_root_hash),
            "reconcile-after-divergence: pulling canonical state from trusted anchor"
        );

        match self.initiate_sync(context_id, anchor_peer).await {
            Ok((peer_used, protocol)) => {
                tracing::info!(
                    %context_id,
                    %peer_used,
                    ?protocol,
                    "reconcile-after-divergence: anchor sync completed; verifying post-adoption hash"
                );
                // Use `peer_used` (the peer the sync actually
                // resolved against) for verify-time logs rather than
                // the originally-picked `anchor_peer`. The two
                // normally agree, but `initiate_sync` is the
                // authoritative source.
                let converged = self.verify_post_reconcile_root_hash(
                    context_id,
                    expected_root_hash,
                    peer_used,
                    op_kind,
                );
                if converged {
                    record_reconcile_success(&self.node_state.reconcile_attempts, &context_id);
                } else {
                    let failures =
                        record_reconcile_failure(&self.node_state.reconcile_attempts, context_id);
                    tracing::warn!(
                        %context_id,
                        op_kind,
                        consecutive_failures = failures,
                        next_cooldown_secs = reconcile_cooldown(failures).as_secs(),
                        "reconcile-after-divergence: recorded failure; subsequent reconcile \
                         attempts for this context are gated by the backoff window"
                    );
                }
            }
            Err(err) => {
                let failures =
                    record_reconcile_failure(&self.node_state.reconcile_attempts, context_id);
                tracing::warn!(
                    %context_id,
                    %anchor_peer,
                    op_kind,
                    %err,
                    consecutive_failures = failures,
                    next_cooldown_secs = reconcile_cooldown(failures).as_secs(),
                    "reconcile-after-divergence: anchor sync failed; reconcile will re-attempt \
                     after the backoff window lapses"
                );
            }
        }
    }

    /// Compare the local context's `root_hash` against the signed
    /// `expected_root_hash` from the triggering op. On match, log
    /// at info level — the reconcile succeeded. On mismatch, log
    /// loudly at warn: the anchor served state that does not match
    /// the canonical expected, OR the local apply diverged again
    /// after sync. Either is operator-investigation territory and
    /// a follow-up will replace this post-adoption check with
    /// pre-adoption rejection + rollback once the store layer has
    /// transactional staging.
    fn verify_post_reconcile_root_hash(
        &self,
        context_id: ContextId,
        expected_root_hash: [u8; 32],
        anchor_peer: PeerId,
        op_kind: &'static str,
    ) -> bool {
        let Ok(Some(context)) = self.context_client.get_context(&context_id) else {
            tracing::warn!(
                %context_id,
                %anchor_peer,
                op_kind,
                "reconcile-after-divergence: context not found locally after anchor sync — \
                 cannot verify root hash"
            );
            return false;
        };

        let actual_root_hash: [u8; 32] = *AsRef::<[u8; 32]>::as_ref(&context.root_hash);
        if actual_root_hash == expected_root_hash {
            tracing::info!(
                %context_id,
                %anchor_peer,
                op_kind,
                root_hash = %hex::encode(actual_root_hash),
                "reconcile-after-divergence: post-adoption hash matches signed expected — converged"
            );
            true
        } else {
            tracing::warn!(
                %context_id,
                %anchor_peer,
                op_kind,
                expected_root_hash = %hex::encode(expected_root_hash),
                actual_root_hash = %hex::encode(actual_root_hash),
                "reconcile-after-divergence: post-adoption hash does NOT match signed expected — \
                 either the anchor served non-canonical state or local apply diverged again; \
                 operator-investigation territory until pre-adoption rejection lands"
            );
            false
        }
    }
}

/// Exponential cooldown for the reconcile-after-divergence backoff,
/// capped at 30 min. `consecutive_failures == 0` is illegal (the
/// caller only invokes this when at least one failure has been
/// recorded); we treat it the same as `1` to avoid an arithmetic
/// surprise. Schedule:
///
/// - 1 failure → 30s
/// - 2 failures → 60s
/// - 3 failures → 2m
/// - 4 failures → 4m
/// - 5 failures → 8m
/// - 6 failures → 16m
/// - 7+ failures → 30m (cap)
///
/// Free function so backoff math can be unit-tested independently.
fn reconcile_cooldown(consecutive_failures: u32) -> std::time::Duration {
    const BASE_SECS: u64 = 30;
    const MAX: std::time::Duration = std::time::Duration::from_secs(30 * 60);
    let exp = consecutive_failures.saturating_sub(1).min(8);
    let secs = BASE_SECS.saturating_mul(1u64 << u64::from(exp));
    std::time::Duration::from_secs(secs).min(MAX)
}

/// If `context_id` has a recorded prior failure that is still within
/// its cooldown window, return `Some((remaining_cooldown,
/// consecutive_failures))`. Otherwise — no entry, or the cooldown has
/// elapsed — return `None`.
fn reconcile_remaining_cooldown(
    attempts: &dashmap::DashMap<ContextId, crate::state::ReconcileAttempt>,
    context_id: &ContextId,
) -> Option<(std::time::Duration, u32)> {
    let entry = attempts.get(context_id)?;
    let cooldown = reconcile_cooldown(entry.consecutive_failures);
    let elapsed = entry.last_attempt_at.elapsed();
    let remaining = cooldown.checked_sub(elapsed)?;
    if remaining.is_zero() {
        None
    } else {
        Some((remaining, entry.consecutive_failures))
    }
}

/// Record a reconcile failure for `context_id`: bump
/// `consecutive_failures` and stamp `last_attempt_at = now`. Returns
/// the new failure count so the caller can log the next cooldown
/// directly.
fn record_reconcile_failure(
    attempts: &dashmap::DashMap<ContextId, crate::state::ReconcileAttempt>,
    context_id: ContextId,
) -> u32 {
    let mut entry = attempts
        .entry(context_id)
        .or_insert_with(|| crate::state::ReconcileAttempt {
            last_attempt_at: std::time::Instant::now(),
            consecutive_failures: 0,
        });
    entry.consecutive_failures = entry.consecutive_failures.saturating_add(1);
    entry.last_attempt_at = std::time::Instant::now();
    entry.consecutive_failures
}

/// Clear backoff state for `context_id` after a successful reconcile.
/// Subsequent divergences are treated as fresh — no inherited cooldown.
fn record_reconcile_success(
    attempts: &dashmap::DashMap<ContextId, crate::state::ReconcileAttempt>,
    context_id: &ContextId,
) {
    let _ = attempts.remove(context_id);
}

/// Stable-partition `peers` so peers with an observed trusted-anchor
/// identity come first while preserving the relative order within each
/// partition. Returns the index at which non-anchor peers start (i.e.
/// the count of anchor peers).
///
/// A peer is an anchor if at least one identity recorded in
/// `peer_identities` for that peer appears in `anchors`. An empty
/// `anchors` set returns 0 immediately — no point sorting if every
/// peer is going to be non-anchor.
///
/// The anchor predicate is materialized into a `Vec<bool>` keyed by
/// the peer's original index before sorting. This avoids reacquiring
/// the `DashMap` shard lock O(n log n) times during `sort_by_key`'s
/// comparisons, and prevents a concurrent cache mutation from causing
/// the post-sort anchor count to disagree with the actual partition
/// boundary — both `sort_by_key` and the count read from the same
/// snapshot.
///
/// Free function (not a method) so it can be unit-tested against
/// synthetic inputs without spinning up a sync manager.
fn partition_peers_anchor_first(
    peers: &mut [libp2p::PeerId],
    peer_identities: &dashmap::DashMap<
        libp2p::PeerId,
        std::collections::BTreeSet<calimero_primitives::identity::PublicKey>,
    >,
    anchors: &std::collections::BTreeSet<calimero_primitives::identity::PublicKey>,
) -> usize {
    if anchors.is_empty() {
        return 0;
    }
    let anchor_flags: Vec<bool> = peers
        .iter()
        .map(|peer| {
            peer_identities
                .get(peer)
                .map(|ids| ids.iter().any(|id| anchors.contains(id)))
                .unwrap_or(false)
        })
        .collect();
    // sort_by_key over a pre-indexed flag table — stable, so the
    // caller's random shuffle order is preserved within each partition.
    let mut indices: Vec<usize> = (0..peers.len()).collect();
    indices.sort_by_key(|&i| !anchor_flags[i]);
    let anchor_count = anchor_flags.iter().filter(|&&f| f).count();
    let reordered: Vec<libp2p::PeerId> = indices.iter().map(|&i| peers[i]).collect();
    peers.copy_from_slice(&reordered);
    anchor_count
}

impl SyncManager {
    /// Actively request governance catch-up from a specific peer whose
    /// identity we don't yet recognize as a context member.
    ///
    /// Scenario: a peer opens a sync stream to us, but their identity isn't
    /// in our local governance DAG yet because fire-and-forget `MemberAdded`
    /// gossip (issue #2237) hasn't reached us. The legacy path waited 2 s
    /// for gossip and then closed the stream, stalling the initiator for
    /// up to 30 s (`NamespaceStateHeartbeat` cadence). Instead, open a
    /// separate stream back to the peer with `NamespaceBackfillRequest`
    /// (empty `delta_ids` = "send everything you have for this namespace"),
    /// apply every op they return, and let the caller re-check membership.
    ///
    /// Best-effort: any failure (no group resolved, stream open fails,
    /// peer returns no ops, ops fail to apply) is logged at debug and the
    /// caller proceeds to close the stream as before. The real fix is the
    /// three-phase contract in #2237; this is a responder-side bandaid
    /// that turns a 30 s stall into at worst a second round-trip.
    async fn request_governance_catchup_from_peer(
        &self,
        peer_id: PeerId,
        context_id: &ContextId,
        their_identity: &PublicKey,
    ) {
        let store = self.context_client.datastore();
        let namespace_id =
            match calimero_context::group_store::get_group_for_context(store, context_id) {
                Ok(Some(group_id)) => {
                    match calimero_context::group_store::resolve_namespace(store, &group_id) {
                        Ok(ns) => ns.to_bytes(),
                        Err(err) => {
                            debug!(
                                %context_id,
                                %their_identity,
                                %err,
                                "failed to resolve namespace for governance catch-up"
                            );
                            return;
                        }
                    }
                }
                Ok(None) => {
                    debug!(
                        %context_id,
                        %their_identity,
                        "context not in a group — no namespace to request catch-up from"
                    );
                    return;
                }
                Err(err) => {
                    debug!(
                        %context_id,
                        %their_identity,
                        %err,
                        "failed to resolve group for governance catch-up"
                    );
                    return;
                }
            };

        let mut stream = match self.network_client.open_stream(peer_id).await {
            Ok(s) => s,
            Err(err) => {
                debug!(
                    %context_id,
                    %their_identity,
                    %peer_id,
                    %err,
                    "failed to open catch-up stream to peer"
                );
                return;
            }
        };

        let msg = StreamMessage::Init {
            context_id: ContextId::from([0u8; 32]),
            party_id: PublicKey::from([0u8; 32]),
            payload: InitPayload::NamespaceBackfillRequest {
                namespace_id,
                delta_ids: Vec::new(),
            },
            next_nonce: rand::thread_rng().gen(),
        };

        if let Err(err) = super::stream::send(&mut stream, &msg, None).await {
            debug!(
                %context_id,
                %their_identity,
                %peer_id,
                %err,
                "failed to send NamespaceBackfillRequest during catch-up"
            );
            return;
        }

        let response = match super::stream::recv(&mut stream, None, self.sync_config.timeout).await
        {
            Ok(Some(StreamMessage::Message {
                payload: MessagePayload::NamespaceBackfillResponse { deltas },
                ..
            })) => deltas,
            Ok(_) => {
                debug!(
                    %context_id,
                    %their_identity,
                    %peer_id,
                    "unexpected response to NamespaceBackfillRequest during catch-up"
                );
                return;
            }
            Err(err) => {
                debug!(
                    %context_id,
                    %their_identity,
                    %peer_id,
                    %err,
                    "catch-up NamespaceBackfillRequest timed out or failed"
                );
                return;
            }
        };

        if response.is_empty() {
            debug!(
                %context_id,
                %their_identity,
                %peer_id,
                "peer returned no namespace ops for catch-up"
            );
            return;
        }

        use calimero_context_client::messages::NamespaceApplyOutcome;
        let ops_count = response.len();
        let mut applied = 0usize;
        let mut newly_applied = 0usize;
        for (_delta_id, op_bytes) in response {
            let op = match borsh::from_slice::<
                calimero_context_client::local_governance::SignedNamespaceOp,
            >(&op_bytes)
            {
                Ok(o) => o,
                Err(err) => {
                    debug!(
                        %context_id,
                        %their_identity,
                        %err,
                        "failed to decode catch-up op"
                    );
                    continue;
                }
            };
            match self.context_client.apply_signed_namespace_op(op).await {
                Ok(NamespaceApplyOutcome::Applied { .. }) => {
                    applied += 1;
                    newly_applied += 1;
                }
                Ok(_) => {
                    applied += 1;
                }
                Err(err) => {
                    debug!(
                        %context_id,
                        %their_identity,
                        %err,
                        "failed to apply catch-up op"
                    );
                    continue;
                }
            }
        }

        // Single FSM notification after the batch when we actually
        // advanced the local applied_through. `Pending` (parents missing)
        // and `Duplicate` outcomes are no-progress from the FSM's POV,
        // so we skip the mailbox hop in those cases. Mirrors the gate
        // used at `network_event/namespace.rs:120`.
        if newly_applied > 0 {
            self.node_client.notify_namespace_op_applied(namespace_id);
        }

        debug!(
            %context_id,
            %their_identity,
            %peer_id,
            ops_received = ops_count,
            ops_applied = applied,
            "governance catch-up complete"
        );
    }

    /// Handle a namespace backfill request: look up full `SignedNamespaceOp`
    /// payloads for the requested delta IDs and send them back.
    ///
    /// We scan the namespace governance op store for matching delta IDs.
    /// For each requested delta, if we have the full op (stored when we were
    /// a member at apply time), we include it in the response.
    async fn handle_namespace_backfill_request(
        &self,
        namespace_id: [u8; 32],
        delta_ids: &[[u8; 32]],
        stream: &mut Stream,
        nonce: Nonce,
    ) -> eyre::Result<()> {
        let store = self.context_client.datastore_handle().into_inner();
        let handle = store.handle();
        let mut found = Vec::new();

        /// Maximum ops returned in a single backfill response to prevent
        /// memory exhaustion from large namespace governance DAGs.
        const MAX_BACKFILL_OPS: usize = 500;

        if delta_ids.is_empty() {
            // Empty request = "give me everything for this namespace".
            let start = calimero_store::key::NamespaceGovOp::new(namespace_id, [0u8; 32]);
            let mut iter = handle.iter::<calimero_store::key::NamespaceGovOp>()?;
            let first = iter.seek(start).transpose();

            for entry in first.into_iter().chain(iter.keys()) {
                let key = match entry {
                    Ok(k) => k,
                    Err(_) => break,
                };
                if key.namespace_id() != namespace_id {
                    break;
                }
                if let Ok(Some(value)) = handle.get(&key) {
                    if let Some(signed_bytes) =
                        crate::sync::helpers::extract_signed_op_bytes(&value.skeleton_bytes)
                    {
                        found.push((key.delta_id(), signed_bytes));
                        if found.len() >= MAX_BACKFILL_OPS {
                            break;
                        }
                    }
                }
            }
        } else {
            for delta_id in delta_ids.iter().take(MAX_BACKFILL_OPS) {
                let key = calimero_store::key::NamespaceGovOp::new(namespace_id, *delta_id);
                if let Ok(Some(value)) = handle.get(&key) {
                    if let Some(signed_bytes) =
                        crate::sync::helpers::extract_signed_op_bytes(&value.skeleton_bytes)
                    {
                        found.push((*delta_id, signed_bytes));
                    }
                }
            }
        }

        let msg = StreamMessage::Message {
            sequence_id: 0,
            payload: MessagePayload::NamespaceBackfillResponse { deltas: found },
            next_nonce: nonce,
        };
        super::stream::send(stream, &msg, None).await?;
        Ok(())
    }

    /// Handle an incoming NamespaceJoinRequest on the responder side.
    ///
    /// Validates the invitation, wraps the group key for the joiner,
    /// enumerates contexts, and collects governance ops.
    async fn handle_namespace_join_request(
        &self,
        namespace_id: [u8; 32],
        invitation_bytes: &[u8],
        joiner_public_key: PublicKey,
        stream: &mut Stream,
        nonce: Nonce,
    ) -> eyre::Result<()> {
        use calimero_context::group_store::{
            enumerate_group_contexts, get_default_capabilities, load_current_group_key,
            load_group_meta, wrap_group_key_for_member,
        };
        use calimero_context_config::types::ContextGroupId;
        use calimero_context_config::types::SignedGroupOpenInvitation;

        let _invitation: SignedGroupOpenInvitation = match borsh::from_slice(invitation_bytes) {
            Ok(inv) => inv,
            Err(err) => {
                let msg = StreamMessage::Message {
                    sequence_id: 0,
                    payload: MessagePayload::NamespaceJoinRejected {
                        reason: format!("invalid invitation: {err}"),
                    },
                    next_nonce: nonce,
                };
                super::stream::send(stream, &msg, None).await?;
                return Ok(());
            }
        };

        let group_id = ContextGroupId::from(namespace_id);
        let store = self.context_client.datastore_handle().into_inner();

        let meta = match load_group_meta(&store, &group_id)? {
            Some(m) => m,
            None => {
                let msg = StreamMessage::Message {
                    sequence_id: 0,
                    payload: MessagePayload::NamespaceJoinRejected {
                        reason: "group not found".to_owned(),
                    },
                    next_nonce: nonce,
                };
                super::stream::send(stream, &msg, None).await?;
                return Ok(());
            }
        };

        let key_envelope_bytes = match load_current_group_key(&store, &group_id)? {
            Some((_key_id, group_key)) => {
                let ns_identity = calimero_context::group_store::resolve_namespace_identity_record(
                    &store, &group_id,
                )?;
                match ns_identity {
                    Some(record) => {
                        let sender_sk =
                            calimero_primitives::identity::PrivateKey::from(record.private_key);
                        match wrap_group_key_for_member(&sender_sk, &joiner_public_key, &group_key)
                        {
                            Ok(envelope) => borsh::to_vec(&envelope).unwrap_or_default(),
                            Err(err) => {
                                warn!(
                                    namespace_id = %hex::encode(namespace_id),
                                    %err,
                                    "failed to wrap group key for joiner"
                                );
                                Vec::new()
                            }
                        }
                    }
                    None => {
                        warn!(
                            namespace_id = %hex::encode(namespace_id),
                            "no namespace identity found, cannot wrap key"
                        );
                        Vec::new()
                    }
                }
            }
            None => Vec::new(),
        };

        // Pre-register the joiner as a group member and write ContextIdentity
        // entries so that when the joiner opens a sync stream, this node's
        // membership check (has_member) passes immediately.
        if let Err(e) = calimero_context::group_store::add_group_member(
            &store,
            &group_id,
            &joiner_public_key,
            calimero_primitives::context::GroupMemberRole::Member,
        ) {
            warn!(%e, "failed to pre-register joiner as group member");
        }

        let context_ids = enumerate_group_contexts(&store, &group_id, 0, usize::MAX)?;
        let application_id: [u8; 32] = *meta.target_application_id.as_ref();

        for ctx_id in &context_ids {
            let ci_key = calimero_store::key::ContextIdentity::new(*ctx_id, joiner_public_key);
            let mut handle = store.handle();
            if !handle.has(&ci_key).unwrap_or(false) {
                let _ = handle.put(
                    &ci_key,
                    &calimero_store::types::ContextIdentity {
                        private_key: None,
                        sender_key: None,
                    },
                );
            }
        }

        let governance_ops = self.collect_namespace_governance_ops(namespace_id)?;

        // Issue #2256: the namespace's default-capabilities value travels
        // with the bundle so the joiner doesn't need to fall back to a
        // hard-coded constant. Read whatever the responder currently
        // believes (already reflects any admin-issued
        // `DefaultCapabilitiesSet` ops because the local store is
        // updated as those ops apply). `unwrap_or(0)` matches the
        // pre-existing semantics for "default key absent."
        let default_capabilities = get_default_capabilities(&store, &group_id)?.unwrap_or(0);

        debug!(
            namespace_id = %hex::encode(namespace_id),
            has_key = !key_envelope_bytes.is_empty(),
            context_count = context_ids.len(),
            app_id = %hex::encode(application_id),
            governance_ops_count = governance_ops.len(),
            default_capabilities,
            "Sending NamespaceJoinResponse"
        );

        let msg = StreamMessage::Message {
            sequence_id: 0,
            payload: MessagePayload::NamespaceJoinResponse {
                key_envelope_bytes,
                context_ids,
                application_id,
                governance_ops,
                default_capabilities,
            },
            next_nonce: nonce,
        };
        super::stream::send(stream, &msg, None).await?;
        Ok(())
    }

    /// Handle an incoming `OpenSubgroupJoinRequest` (issue #2357) on the
    /// responder side. Validates that the joiner has
    /// `MembershipPath::Inherited` to the requested subgroup, wraps the
    /// local subgroup key for the joiner via ECDH, and replies with the
    /// envelope. Mirrors `handle_namespace_join_request` for the
    /// inherited self-join path.
    async fn handle_open_subgroup_join_request(
        &self,
        namespace_id: [u8; 32],
        subgroup_id: [u8; 32],
        joiner_public_key: PublicKey,
        stream: &mut Stream,
        nonce: Nonce,
    ) -> eyre::Result<()> {
        use calimero_context::group_store::{
            check_group_membership_path, load_current_group_key, load_group_meta,
            resolve_namespace, resolve_namespace_identity_record, wrap_group_key_for_member,
            MembershipPath,
        };
        use calimero_context_config::types::ContextGroupId;

        let subgroup_gid = ContextGroupId::from(subgroup_id);
        let store = self.context_client.datastore_handle().into_inner();

        // Cross-namespace pin: the requested subgroup must belong to the
        // namespace the joiner named, otherwise an attacker on namespace
        // A could elicit a key for a subgroup of namespace B.
        match resolve_namespace(&store, &subgroup_gid) {
            Ok(ns) if ns.to_bytes() == namespace_id => {}
            Ok(other_ns) => {
                let msg = StreamMessage::Message {
                    sequence_id: 0,
                    payload: MessagePayload::OpenSubgroupJoinRejected {
                        reason: format!(
                            "subgroup belongs to namespace {} not {}",
                            hex::encode(other_ns.to_bytes()),
                            hex::encode(namespace_id),
                        ),
                    },
                    next_nonce: nonce,
                };
                super::stream::send(stream, &msg, None).await?;
                return Ok(());
            }
            Err(err) => {
                let msg = StreamMessage::Message {
                    sequence_id: 0,
                    payload: MessagePayload::OpenSubgroupJoinRejected {
                        reason: format!("resolve namespace: {err}"),
                    },
                    next_nonce: nonce,
                };
                super::stream::send(stream, &msg, None).await?;
                return Ok(());
            }
        }

        if load_group_meta(&store, &subgroup_gid)?.is_none() {
            let msg = StreamMessage::Message {
                sequence_id: 0,
                payload: MessagePayload::OpenSubgroupJoinRejected {
                    reason: "subgroup not found locally".to_owned(),
                },
                next_nonce: nonce,
            };
            super::stream::send(stream, &msg, None).await?;
            return Ok(());
        }

        // Authorisation check: the joiner must reach the subgroup via the
        // Open-chain inheritance walk. `MembershipPath::Inherited`
        // implies every intermediate ancestor was Open (see
        // `membership.rs:267`), so this is the proof of authorisation.
        match check_group_membership_path(&store, &subgroup_gid, &joiner_public_key)? {
            MembershipPath::Inherited { .. } | MembershipPath::Direct => {}
            MembershipPath::None => {
                let msg = StreamMessage::Message {
                    sequence_id: 0,
                    payload: MessagePayload::OpenSubgroupJoinRejected {
                        reason: "joiner has no membership path to subgroup".to_owned(),
                    },
                    next_nonce: nonce,
                };
                super::stream::send(stream, &msg, None).await?;
                return Ok(());
            }
        }

        let key_envelope_bytes = match load_current_group_key(&store, &subgroup_gid)? {
            Some((_key_id, group_key)) => {
                let ns_gid = ContextGroupId::from(namespace_id);
                match resolve_namespace_identity_record(&store, &ns_gid)? {
                    Some(record) => {
                        let sender_sk =
                            calimero_primitives::identity::PrivateKey::from(record.private_key);
                        match wrap_group_key_for_member(&sender_sk, &joiner_public_key, &group_key)
                        {
                            Ok(envelope) => borsh::to_vec(&envelope).unwrap_or_default(),
                            Err(err) => {
                                warn!(
                                    namespace_id = %hex::encode(namespace_id),
                                    subgroup_id = %hex::encode(subgroup_id),
                                    %err,
                                    "failed to wrap subgroup key for joiner"
                                );
                                Vec::new()
                            }
                        }
                    }
                    None => {
                        warn!(
                            namespace_id = %hex::encode(namespace_id),
                            "no namespace identity, cannot wrap subgroup key"
                        );
                        Vec::new()
                    }
                }
            }
            None => Vec::new(),
        };

        debug!(
            namespace_id = %hex::encode(namespace_id),
            subgroup_id = %hex::encode(subgroup_id),
            has_key = !key_envelope_bytes.is_empty(),
            "Sending OpenSubgroupJoinResponse"
        );

        let msg = StreamMessage::Message {
            sequence_id: 0,
            payload: MessagePayload::OpenSubgroupJoinResponse { key_envelope_bytes },
            next_nonce: nonce,
        };
        super::stream::send(stream, &msg, None).await?;
        Ok(())
    }

    /// Initiator side for `request_open_subgroup_join`. Picks a mesh peer
    /// on the namespace topic, opens a stream, sends the request, and
    /// returns the wrapped key envelope. Same peer-discovery retry loop
    /// as `initiate_namespace_join`.
    async fn initiate_open_subgroup_join(
        &self,
        params: OpenSubgroupJoinParams,
    ) -> eyre::Result<Vec<u8>> {
        let topic = libp2p::gossipsub::TopicHash::from_raw(format!(
            "ns/{}",
            hex::encode(params.namespace_id)
        ));

        let mut peers = Vec::new();
        for attempt in 1..=super::config::DEFAULT_MESH_RETRIES_UNINITIALIZED {
            peers = self.network_client.mesh_peers(topic.clone()).await;
            if !peers.is_empty() {
                break;
            }
            if attempt < super::config::DEFAULT_MESH_RETRIES_UNINITIALIZED {
                debug!(
                    namespace_id = %hex::encode(params.namespace_id),
                    subgroup_id = %hex::encode(params.subgroup_id),
                    attempt,
                    "No namespace mesh peers yet for open-subgroup join, retrying..."
                );
                time::sleep(std::time::Duration::from_millis(
                    super::config::DEFAULT_MESH_RETRY_DELAY_MS_UNINITIALIZED,
                ))
                .await;
            }
        }

        let peer = peers.first().ok_or_else(|| {
            eyre::eyre!(
                "no mesh peers for namespace {} (open-subgroup join)",
                hex::encode(params.namespace_id)
            )
        })?;

        let mut stream = self
            .network_client
            .open_stream(*peer)
            .await
            .wrap_err("open stream for open-subgroup join")?;

        let msg = StreamMessage::Init {
            context_id: calimero_primitives::context::ContextId::from([0u8; 32]),
            party_id: params.joiner_public_key,
            payload: InitPayload::OpenSubgroupJoinRequest {
                namespace_id: params.namespace_id,
                subgroup_id: params.subgroup_id,
                joiner_public_key: params.joiner_public_key,
            },
            next_nonce: rand::thread_rng().gen(),
        };

        super::stream::send(&mut stream, &msg, None).await?;

        match super::stream::recv(&mut stream, None, self.sync_config.timeout).await? {
            Some(StreamMessage::Message {
                payload: MessagePayload::OpenSubgroupJoinResponse { key_envelope_bytes },
                ..
            }) => {
                if key_envelope_bytes.is_empty() {
                    eyre::bail!(
                        "responder did not hold the subgroup key for {}",
                        hex::encode(params.subgroup_id)
                    );
                }
                Ok(key_envelope_bytes)
            }
            Some(StreamMessage::Message {
                payload: MessagePayload::OpenSubgroupJoinRejected { reason },
                ..
            }) => {
                eyre::bail!("open-subgroup join rejected: {}", reason)
            }
            other => {
                eyre::bail!(
                    "unexpected response to open-subgroup join request: {:?}",
                    other.as_ref().map(|m| std::mem::discriminant(m))
                )
            }
        }
    }

    /// Collect all governance ops for a namespace (reused by the join responder).
    ///
    /// Returns bare `SignedNamespaceOp` bytes (not `StoredNamespaceEntry` wrapped)
    /// so recipients can `borsh::from_slice::<SignedNamespaceOp>` directly.
    fn collect_namespace_governance_ops(
        &self,
        namespace_id: [u8; 32],
    ) -> eyre::Result<Vec<Vec<u8>>> {
        let store = self.context_client.datastore_handle().into_inner();
        let handle = store.handle();
        let mut ops = Vec::new();

        let start = calimero_store::key::NamespaceGovOp::new(namespace_id, [0u8; 32]);
        let mut iter = handle.iter::<calimero_store::key::NamespaceGovOp>()?;
        let first = iter.seek(start).transpose();

        for entry in first.into_iter().chain(iter.keys()) {
            let key = match entry {
                Ok(k) => k,
                Err(_) => break,
            };
            if key.namespace_id() != namespace_id {
                break;
            }
            if let Ok(Some(value)) = handle.get(&key) {
                if let Some(bytes) =
                    crate::sync::helpers::extract_signed_op_bytes(&value.skeleton_bytes)
                {
                    ops.push(bytes);
                }
            }
        }

        Ok(ops)
    }

    /// Initiator side: open a stream to a mesh peer and perform the
    /// NamespaceJoinRequest / NamespaceJoinResponse exchange.
    async fn initiate_namespace_join(
        &self,
        params: NamespaceJoinParams,
    ) -> eyre::Result<JoinBundle> {
        let topic = libp2p::gossipsub::TopicHash::from_raw(format!(
            "ns/{}",
            hex::encode(params.namespace_id)
        ));

        // Retry peer discovery: gossipsub mesh for the namespace topic may
        // still be forming when this runs (mDNS + GRAFT exchange can take
        // several seconds). Use the same retry window as uninitialized
        // context sync (10 × 1 s = 10 s) to ensure we don't give up too
        // quickly in slower environments (CI, Docker, mDNS cold-start).
        let mut peers = Vec::new();
        for attempt in 1..=super::config::DEFAULT_MESH_RETRIES_UNINITIALIZED {
            peers = self.network_client.mesh_peers(topic.clone()).await;
            if !peers.is_empty() {
                break;
            }
            if attempt < super::config::DEFAULT_MESH_RETRIES_UNINITIALIZED {
                debug!(
                    namespace_id = %hex::encode(params.namespace_id),
                    attempt,
                    "No namespace mesh peers yet, retrying..."
                );
                time::sleep(std::time::Duration::from_millis(
                    super::config::DEFAULT_MESH_RETRY_DELAY_MS_UNINITIALIZED,
                ))
                .await;
            }
        }

        let peer = peers.first().ok_or_else(|| {
            eyre::eyre!(
                "no mesh peers for namespace {}",
                hex::encode(params.namespace_id)
            )
        })?;

        let mut stream = self
            .network_client
            .open_stream(*peer)
            .await
            .wrap_err("open stream for namespace join")?;

        let msg = StreamMessage::Init {
            context_id: calimero_primitives::context::ContextId::from([0u8; 32]),
            party_id: params.joiner_public_key,
            payload: InitPayload::NamespaceJoinRequest {
                namespace_id: params.namespace_id,
                invitation_bytes: params.invitation_bytes,
                joiner_public_key: params.joiner_public_key,
            },
            next_nonce: rand::thread_rng().gen(),
        };

        super::stream::send(&mut stream, &msg, None).await?;

        match super::stream::recv(&mut stream, None, self.sync_config.timeout).await? {
            Some(StreamMessage::Message {
                payload:
                    MessagePayload::NamespaceJoinResponse {
                        key_envelope_bytes,
                        context_ids,
                        application_id,
                        governance_ops,
                        default_capabilities,
                    },
                ..
            }) => Ok(JoinBundle {
                key_envelope_bytes,
                context_ids,
                application_id: application_id.into(),
                governance_ops,
                default_capabilities,
            }),
            Some(StreamMessage::Message {
                payload: MessagePayload::NamespaceJoinRejected { reason },
                ..
            }) => {
                eyre::bail!("namespace join rejected: {}", reason)
            }
            other => {
                eyre::bail!(
                    "unexpected response to namespace join request: {:?}",
                    other.as_ref().map(|m| std::mem::discriminant(m))
                )
            }
        }
    }

    /// Pull all namespace governance ops from a mesh peer.
    async fn sync_namespace_from_peer(&self, namespace_id: [u8; 32]) {
        use calimero_node_primitives::sync::{InitPayload, MessagePayload, StreamMessage};

        let topic =
            libp2p::gossipsub::TopicHash::from_raw(format!("ns/{}", hex::encode(namespace_id)));
        let peers = self.network_client.mesh_peers(topic).await;
        let Some(peer) = peers.first() else {
            debug!(
                namespace_id = %hex::encode(namespace_id),
                "no mesh peers for namespace sync"
            );
            return;
        };

        let Ok(mut stream) = self.network_client.open_stream(*peer).await else {
            debug!("failed to open stream for namespace sync");
            return;
        };

        let msg = StreamMessage::Init {
            context_id: calimero_primitives::context::ContextId::from([0u8; 32]),
            party_id: calimero_primitives::identity::PublicKey::from([0u8; 32]),
            payload: InitPayload::NamespaceBackfillRequest {
                namespace_id,
                delta_ids: vec![],
            },
            next_nonce: {
                use rand::Rng;
                rand::thread_rng().gen()
            },
        };

        if let Err(err) = super::stream::send(&mut stream, &msg, None).await {
            debug!(%err, "failed to send NamespaceBackfillRequest");
            return;
        }

        match super::stream::recv(&mut stream, None, self.sync_config.timeout).await {
            Ok(Some(StreamMessage::Message {
                payload: MessagePayload::NamespaceBackfillResponse { deltas },
                ..
            })) => {
                info!(
                    namespace_id = %hex::encode(namespace_id),
                    ops = deltas.len(),
                    "received namespace governance ops from peer"
                );
                use calimero_context_client::messages::NamespaceApplyOutcome;
                let mut newly_applied = false;
                // Collect divergence reports surfaced by `MemberRemoved` /
                // `MemberLeft` ops arriving via the namespace-backfill
                // path. Same reasoning as the gossip-receive path: once
                // the DAG marks an op `Applied`, any later gossipsub
                // arrival of the same op becomes `Duplicate` and the
                // apply work — including the post-apply hash check —
                // is skipped. If a `MemberRemoved` op arrives first via
                // backfill and divergence is dropped here, no later
                // path will re-surface it. Fire reconcile after the
                // batch loop so we don't hold `&mut` borrows across an
                // await on `self`.
                let mut pending_divergences: Vec<
                    calimero_context_client::messages::DivergenceReport,
                > = Vec::new();
                for (delta_id, op_bytes) in deltas {
                    match borsh::from_slice::<
                        calimero_context_client::local_governance::SignedNamespaceOp,
                    >(&op_bytes)
                    {
                        Ok(op) => {
                            match self
                                .context_client
                                .apply_signed_namespace_op(op.clone())
                                .await
                            {
                                Err(err) => {
                                    // Capture enough context to diagnose codec/schema
                                    // mismatches (observed as "Unexpected length of
                                    // input" from the inner GroupOp decode when a
                                    // variant's binary layout has drifted). The
                                    // op-type tag + byte-length give us a fingerprint
                                    // without logging potentially sensitive payload.
                                    let op_kind = match &op.op {
                                        calimero_context_client::local_governance::NamespaceOp::Root(r) => {
                                            format!("Root::{r:?}").split('{').next().unwrap_or("Root").trim().to_owned()
                                        }
                                        calimero_context_client::local_governance::NamespaceOp::Group { .. } => {
                                            "Group".to_owned()
                                        }
                                    };
                                    warn!(
                                        namespace_id = %hex::encode(namespace_id),
                                        delta_id = %hex::encode(delta_id),
                                        op_kind = %op_kind,
                                        signer = %op.signer,
                                        nonce = op.nonce,
                                        op_bytes_len = op_bytes.len(),
                                        ?err,
                                        "failed to apply namespace governance op from backfill"
                                    );
                                }
                                Ok(NamespaceApplyOutcome::Applied { divergence }) => {
                                    newly_applied = true;
                                    if let Some(report) = divergence {
                                        pending_divergences.push(report);
                                    }
                                    // Only react to a *newly-applied*
                                    // `MemberJoined`. On `Duplicate`
                                    // (the common case — a backfill
                                    // re-sends the whole DAG every
                                    // round) re-publishing a fresh
                                    // `KeyDelivery` each time would
                                    // grow the namespace governance
                                    // DAG without bound until it hits
                                    // the backfill cap and never
                                    // converges again (#2319).
                                    crate::key_delivery::maybe_publish_key_delivery(
                                        &self.context_client,
                                        &self.node_client,
                                        &op,
                                    )
                                    .await;
                                }
                                Ok(_) => {}
                            }
                        }
                        Err(err) => {
                            warn!(
                                namespace_id = %hex::encode(namespace_id),
                                delta_id = %hex::encode(delta_id),
                                op_bytes_len = op_bytes.len(),
                                op_bytes_prefix = %hex::encode(&op_bytes[..op_bytes.len().min(64)]),
                                %err,
                                "failed to decode namespace governance op from backfill"
                            );
                        }
                    }
                }
                // FSM notify after the batch — gated on at least one
                // `Applied` outcome (Pending/Duplicate are no-progress).
                // See the governance-catch-up notify above for rationale.
                if newly_applied {
                    self.node_client.notify_namespace_op_applied(namespace_id);
                }

                // Route any divergence reports surfaced during the
                // backfill apply loop to the reconcile-via-anchor path.
                // Run sequentially after the batch finishes; we're
                // already in an async method on `&self` so no spawn
                // is needed here (the gossip-receive path uses
                // `actix::spawn` because it runs inside an actor's
                // mailbox slot; this method is invoked by the sync
                // tick which has no such constraint).
                for report in pending_divergences {
                    self.reconcile_after_divergence(report).await;
                }
            }
            _ => {
                debug!("unexpected response to namespace sync request");
            }
        }
    }
}

#[cfg(test)]
mod tests;