blvm-node 0.1.31

//! ChunkAssigner assigns height-ordered chunks to workers. ChunkGuard ensures
//! chunks are re-queued on drop if not disarmed.

use std::collections::{HashMap, VecDeque};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};

use super::types::ChunkWorkItem;
use super::ParallelIBDConfig;

/// Chunk of blocks to download, assigned to a specific peer.
#[derive(Debug, Clone)]
pub struct BlockChunk {
    pub start_height: u64,
    pub end_height: u64,
    pub peer_id: String,
}

/// Create chunks for parallel download.
///
/// When scored_peers is Some and BLVM_IBD_MODE=earliest: assign all chunks to fastest peer
/// (Core-like, avoids chunk-boundary stalls when slow peer holds next chunk).
/// Otherwise: round-robin (chunk i → peer i % num_peers).
pub fn create_chunks(
    config: &ParallelIBDConfig,
    start_height: u64,
    end_height: u64,
    peer_ids: &[String],
    scored_peers: Option<&[(String, f64)]>,
) -> Vec<BlockChunk> {
    let mut chunks = Vec::new();
    let mut current_height = start_height;
    let num_peers = peer_ids.len().max(1);
    let mut chunk_index: usize = 0;

    let use_fastest = (config.mode.eq_ignore_ascii_case("earliest") || config.earliest_first)
        && num_peers > 1
        && scored_peers.map(|s| !s.is_empty()).unwrap_or(false);

    let fastest_peer = if use_fastest {
        scored_peers.and_then(|s| {
            s.iter()
                .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
                .map(|(p, _)| p.clone())
        })
    } else {
        None
    };

    if use_fastest && fastest_peer.is_some() {
        tracing::info!("IBD: earliest-first — all chunks to fastest peer");
    } else {
        tracing::info!(
            "Round-robin chunk assignment: {} peers, chunk_size={}",
            num_peers,
            config.chunk_size
        );
    }

    while current_height <= end_height {
        let (chunk_sz, is_bootstrap) = if current_height == 0 && start_height == 0 {
            let sz = 128.min(end_height.saturating_add(1));
            (sz, true)
        } else {
            (config.chunk_size, false)
        };
        let chunk_end = (current_height + chunk_sz - 1).min(end_height);
        if is_bootstrap {
            tracing::info!(
                "IBD: bootstrap chunk 0-{} (99 and 100 in same chunk)",
                chunk_end
            );
        }

        let peer_id = fastest_peer.clone().unwrap_or_else(|| {
            if peer_ids.is_empty() {
                String::new()
            } else {
                peer_ids[chunk_index % num_peers].clone()
            }
        });

        chunks.push(BlockChunk {
            start_height: current_height,
            end_height: chunk_end,
            peer_id,
        });

        current_height = chunk_end + 1;
        chunk_index += 1;
    }

    chunks
}

/// Sequential chunk assigner: assigns chunks in height order so validation never starves.
/// Workers call get_work(peer_id); assigner returns next chunk when start <= validation_height + max_ahead.
/// Bootstrap serialization: when start_height==0, only chunk (0, N) is assignable until it completes.
/// This ensures block 0 arrives first — otherwise parallel chunks (128+, 256+) can receive blocks before
/// bootstrap, coordinator never gets block 0, and sync never starts.
///
/// Each chunk is assigned to a specific peer (create_chunks). We only give a chunk to a worker
/// whose peer_id matches. Bootstrap chunk is always ≥128 blocks so 99 and 100 are in the same chunk
/// — no out-of-order delivery regardless of peer type.
///
/// Per-peer serial: at most one chunk in flight per peer. Eliminates chunk-boundary stalls (Core-like
/// earliest-first) — chunks complete in order, validation rarely waits for next block.
pub(crate) struct ChunkAssigner {
    chunks: Vec<(u64, u64)>,
    /// Peer assigned to each chunk; same length as chunks. Worker gets chunk only if peer matches.
    /// Ignored when `work_stealing=true` (WAN multi-peer mode).
    chunk_peers: Vec<String>,
    next_index: AtomicUsize,
    retry_queue: Mutex<VecDeque<ChunkWorkItem>>,
    validation_height: Arc<std::sync::atomic::AtomicU64>,
    /// When true, only chunks with start==0 are assignable. Set when start_height==0; cleared when bootstrap chunk completes.
    bootstrap_complete: AtomicBool,
    start_height: u64,
    /// Per-peer serial: peer_id -> (start, end) of chunk in flight. At most one chunk per peer.
    in_flight_per_peer: Mutex<HashMap<String, (u64, u64)>>,
    /// When true (WAN multi-peer), ignore peer binding: any peer worker takes any available chunk.
    work_stealing: bool,
    /// Peer blacklist: peer_id -> blacklisted_until. Blacklisted peers get no work from get_work().
    blacklisted_until: Mutex<HashMap<String, Instant>>,
}

impl ChunkAssigner {
    pub(crate) fn new(
        chunks: Vec<(u64, u64)>,
        chunk_peers: Vec<String>,
        validation_height: Arc<std::sync::atomic::AtomicU64>,
        start_height: u64,
        work_stealing: bool,
    ) -> Self {
        assert_eq!(
            chunks.len(),
            chunk_peers.len(),
            "chunks and chunk_peers must match"
        );
        // Resuming IBD (start_height > 0): no bootstrap serialization, all chunks assignable immediately
        let bootstrap_complete = start_height > 0;
        Self {
            chunks,
            chunk_peers,
            next_index: AtomicUsize::new(0),
            retry_queue: Mutex::new(VecDeque::new()),
            validation_height,
            bootstrap_complete: AtomicBool::new(bootstrap_complete),
            start_height,
            in_flight_per_peer: Mutex::new(HashMap::new()),
            work_stealing,
            blacklisted_until: Mutex::new(HashMap::new()),
        }
    }

    /// Blacklist a peer for `duration`. During this window, `get_work` will not assign it chunks.
    pub(crate) fn blacklist_peer(&self, peer_id: &str, duration: Duration) {
        let until = Instant::now() + duration;
        let mut bl = self.blacklisted_until.lock().unwrap();
        let entry = bl.entry(peer_id.to_string()).or_insert(until);
        if until > *entry {
            *entry = until;
        }
        tracing::warn!(
            "IBD: blacklisted peer {} for {}s",
            peer_id,
            duration.as_secs()
        );
    }

    /// Returns true if the peer is currently blacklisted.
    fn is_blacklisted(&self, peer_id: &str) -> bool {
        let mut bl = self.blacklisted_until.lock().unwrap();
        if let Some(until) = bl.get(peer_id) {
            if Instant::now() < *until {
                return true;
            }
            bl.remove(peer_id);
        }
        false
    }

    /// Mark bootstrap chunk (0..N) complete — enables parallel chunk assignment for start_height > 0.
    pub(crate) fn mark_bootstrap_complete(&self) {
        self.bootstrap_complete.store(true, Ordering::Relaxed);
    }

    /// Returns the next assignable chunk for this peer, or None if nothing ready.
    /// Per-peer serial: returns None if this peer already has a chunk in flight (eliminates chunk-boundary stalls).
    /// Round-robin: prioritizes critical chunk (containing next_needed) from retry, then earliest available.
    /// CRITICAL: Entire operation under one lock to prevent duplicate chunk assignment (race: two workers
    /// for same peer both getting chunk 116240-116255, both requesting same blocks, one starves).
    pub(crate) fn get_work(&self, peer_id: &str, max_ahead: u64) -> Option<(u64, u64)> {
        let bootstrap_done = self.bootstrap_complete.load(Ordering::Relaxed);
        let current_validation = self.validation_height.load(Ordering::Relaxed);
        let next_needed = current_validation + 1;
        let max_start = current_validation.saturating_add(max_ahead);

        // Bootstrap serialization: until bootstrap chunk completes, only assign chunks with start==0
        let allow_chunk = |start: u64| bootstrap_done || start == self.start_height;

        // Blacklisted peers get no work until their cooldown expires.
        if self.is_blacklisted(peer_id) {
            return None;
        }

        // Single lock: check in-flight + find chunk + insert. Prevents duplicate assignment.
        let mut guard = self.in_flight_per_peer.lock().unwrap();
        if guard.contains_key(peer_id) {
            return None;
        }

        // Try retry queue first (critical chunk, then earliest).
        //
        // IMPORTANT: retry-queue chunks are NOT filtered by max_start. These are stall-recovery
        // chunks — the coordinator explicitly decided they're needed to unblock progress. Applying
        // the max_ahead window to retry chunks causes a deadlock when the missing chunk starts just
        // past max_start: validation stalls (can't advance), max_start can't grow (validation stuck),
        // and the chunk can never be taken (max_start check fails). The retry_queue is always small
        // (0–1 entries in practice), so skipping the window check here poses no memory risk.
        {
            let mut retry = self.retry_queue.lock().unwrap();
            let critical = retry.iter().enumerate().find(|(_, (s, e, ex))| {
                *s <= next_needed
                    && next_needed <= *e
                    && ex.as_ref() != Some(&peer_id.to_string())
                    && allow_chunk(*s)
            });
            if let Some((i, _)) = critical {
                let (start, end, _) = retry.remove(i).unwrap();
                guard.insert(peer_id.to_string(), (start, end));
                return Some((start, end));
            }
            let candidate = retry
                .iter()
                .enumerate()
                .filter(|(_, (_, _, ex))| ex.as_ref() != Some(&peer_id.to_string()))
                .filter(|(_, (s, _, _))| allow_chunk(*s))
                .min_by_key(|(_, (s, _, _))| *s);
            if let Some((i, _)) = candidate {
                let (start, end, _) = retry.remove(i).unwrap();
                guard.insert(peer_id.to_string(), (start, end));
                return Some((start, end));
            }
        }

        // Main queue — try the next sequential chunk.
        //
        // Peer binding: enforced for LAN/single-peer modes so a fast LAN peer isn't displaced by
        // slow WAN peers stealing its pre-assigned chunks. For WAN multi-peer (work_stealing=true),
        // binding is skipped — any free peer takes the next available chunk, giving us work-stealing
        // semantics that maximize throughput when peers have heterogeneous speeds.
        let idx = self.next_index.load(Ordering::Relaxed);
        if idx >= self.chunks.len() {
            return None;
        }
        // Peer binding check: skip in work_stealing mode (WAN multi-peer).
        if !self.work_stealing && !self.chunk_peers.is_empty() && self.chunk_peers[idx] != peer_id {
            return None;
        }
        let (start, end) = self.chunks[idx];
        if start > current_validation.saturating_add(max_ahead) {
            return None;
        }
        if !allow_chunk(start) {
            return None;
        }
        self.next_index.store(idx + 1, Ordering::Relaxed);
        guard.insert(peer_id.to_string(), (start, end));
        Some((start, end))
    }

    /// Called when a worker completes (or fails) a chunk. Clears in-flight so peer can get next chunk.
    pub(crate) fn on_chunk_complete(&self, peer_id: &str) {
        self.in_flight_per_peer.lock().unwrap().remove(peer_id);
    }

    pub(crate) fn requeue(&self, start: u64, end: u64, exclude_peer: Option<String>) {
        // Use exclude_peer to avoid immediate retry with same peer, but stall recovery can clear it
        self.retry_queue
            .lock()
            .unwrap()
            .push_back((start, end, exclude_peer));
    }

    /// When validation/coordinator stalls on a missing height, workers may have no in-flight chunk
    /// covering that height (chunk was already marked complete after a bad download). Re-queue the
    /// static chunk that contains `height` so a worker can re-fetch it. Idempotent if already queued.
    pub(crate) fn requeue_chunk_containing_height(&self, height: u64) {
        let Some(&(start, end)) = self
            .chunks
            .iter()
            .find(|(s, e)| height >= *s && height <= *e)
        else {
            tracing::warn!(
                "stall recovery: height {} not in any assigner chunk (chunks={})",
                height,
                self.chunks.len()
            );
            return;
        };
        let mut rq = self.retry_queue.lock().unwrap();
        if rq.iter().any(|(s, e, _)| *s == start && *e == end) {
            return;
        }
        rq.push_back((start, end, None));
        tracing::warn!(
            "stall recovery: requeued chunk {}-{} for missing height {}",
            start,
            end,
            height
        );
    }

    pub(crate) fn is_done(&self) -> bool {
        let idx = self.next_index.load(Ordering::Relaxed);
        idx >= self.chunks.len() && self.retry_queue.lock().unwrap().is_empty()
    }

    pub(crate) fn total_chunks(&self) -> usize {
        self.chunks.len()
    }

    pub(crate) fn remaining_count(&self) -> usize {
        let idx = self.next_index.load(Ordering::Relaxed);
        let retry_len = self.retry_queue.lock().unwrap().len();
        self.chunks.len().saturating_sub(idx) + retry_len
    }
}

/// Re-queues chunk on drop if not disarmed. Prevents chunk loss on panic/task-cancel/any exit.
pub(crate) struct ChunkGuard {
    chunk: Option<ChunkWorkItem>,
    peer_id: Option<String>,
    assigner: Arc<ChunkAssigner>,
}

impl ChunkGuard {
    pub(crate) fn new(
        start: u64,
        end: u64,
        exclude: Option<String>,
        peer_id: String,
        assigner: Arc<ChunkAssigner>,
    ) -> Self {
        Self {
            chunk: Some((start, end, exclude)),
            peer_id: Some(peer_id),
            assigner,
        }
    }
    pub(crate) fn disarm(&mut self) {
        self.chunk = None;
        self.peer_id = None; // Don't call on_chunk_complete on Drop; caller will do it
    }
}

impl Drop for ChunkGuard {
    fn drop(&mut self) {
        if let Some((start, end, exclude)) = self.chunk.take() {
            self.assigner.requeue(start, end, exclude);
        }
        if let Some(peer_id) = self.peer_id.take() {
            self.assigner.on_chunk_complete(&peer_id);
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::sync::atomic::AtomicU64;

    fn assigner_for_heights(
        chunks: &[(u64, u64)],
        peers: &[&str],
        start_height: u64,
        work_stealing: bool,
    ) -> ChunkAssigner {
        let chunk_peers: Vec<String> = chunks
            .iter()
            .enumerate()
            .map(|(i, _)| peers[i % peers.len()].to_string())
            .collect();
        ChunkAssigner::new(
            chunks.to_vec(),
            chunk_peers,
            Arc::new(AtomicU64::new(0)),
            start_height,
            work_stealing,
        )
    }

    #[test]
    fn get_work_assigns_sequential_chunks_per_peer() {
        let chunks = vec![(200, 263), (264, 327)];
        let assigner = assigner_for_heights(&chunks, &["p1", "p2"], 200, false);
        let w0 = assigner.get_work("p1", 1000).expect("chunk 0");
        assert_eq!(w0, (200, 263));
        assert!(
            assigner.get_work("p1", 1000).is_none(),
            "one in flight per peer"
        );
        assigner.on_chunk_complete("p1");
        let w1 = assigner.get_work("p2", 1000).expect("chunk 1");
        assert_eq!(w1, (264, 327));
    }

    #[test]
    fn bootstrap_serializes_until_marked_complete() {
        let chunks = vec![(0, 127), (128, 255)];
        let assigner = assigner_for_heights(&chunks, &["p1"], 0, false);
        assert_eq!(assigner.get_work("p1", 1000), Some((0, 127)));
        assigner.on_chunk_complete("p1");
        assert!(
            assigner.get_work("p1", 1000).is_none(),
            "second chunk blocked until bootstrap done"
        );
        assigner.mark_bootstrap_complete();
        assert_eq!(assigner.get_work("p1", 1000), Some((128, 255)));
    }

    #[test]
    fn requeue_chunk_containing_height_is_idempotent() {
        let chunks = vec![(100, 199)];
        let assigner = assigner_for_heights(&chunks, &["p1"], 100, false);
        assigner.requeue_chunk_containing_height(150);
        assigner.requeue_chunk_containing_height(150);
        assert_eq!(assigner.remaining_count(), 2);
    }

    #[test]
    fn blacklist_blocks_peer_until_expired() {
        let chunks = vec![(0, 63)];
        let assigner = assigner_for_heights(&chunks, &["p1"], 0, false);
        assigner.blacklist_peer("p1", Duration::from_secs(3600));
        assert!(assigner.get_work("p1", 1000).is_none());
    }

    #[test]
    fn work_stealing_ignores_peer_binding() {
        let chunks = vec![(0, 63)];
        let assigner = assigner_for_heights(&chunks, &["p1"], 0, true);
        assert_eq!(assigner.get_work("other-peer", 1000), Some((0, 63)));
    }

    #[test]
    fn chunk_guard_requeues_on_drop() {
        let chunks = vec![(0, 63)];
        let assigner = Arc::new(assigner_for_heights(&chunks, &["p1"], 0, false));
        let work = assigner.get_work("p1", 1000).unwrap();
        {
            let _guard = ChunkGuard::new(work.0, work.1, None, "p1".into(), Arc::clone(&assigner));
        }
        assert_eq!(assigner.remaining_count(), 1);
    }
}