aletheiadb 0.1.0

//! Concurrent Write-Ahead Log implementation.
//!
//! This module provides [`ConcurrentWal`], a high-performance WAL that supports
//! concurrent appends from multiple threads with minimal contention.
//!
//! # Architecture
//!
//! ```text
//!                    ┌─────────────────────┐
//!                    │    LSN Allocator    │
//!                    │  AtomicU64::fetch_add
//!                    └──────────┬──────────┘
//!                               │
//!       ┌───────────────────────┼───────────────────────┐
//!       ▼                       ▼                       ▼
//! ┌─────────────┐         ┌─────────────┐         ┌─────────────┐
//! │   Stripe 0  │         │   Stripe 1  │         │  Stripe N   │
//! │ Ring Buffer │         │ Ring Buffer │         │ Ring Buffer │
//! └─────────────┘         └─────────────┘         └─────────────┘
//!       └───────────────────────┼───────────────────────┘
//!                               ▼
//!                    ┌─────────────────────┐
//!                    │  Flush Coordinator  │
//!                    │  - Collects stripes │
//!                    │  - Sorts by LSN     │
//!                    │  - Writes segment   │
//!                    └─────────────────────┘
//! ```
//!
//! # Thread Safety
//!
//! - Multiple threads can call `append*` methods concurrently
//! - Writers are assigned to stripes via thread-local affinity
//! - The flush coordinator drains all stripes and writes to disk
//!
//! # Performance
//!
//! - **Append latency**: ~50-100ns (lock-free)
//! - **Throughput**: 500K+ entries/sec with 16+ stripes
//! - **Scalability**: Linear up to 64 concurrent writers
//!
//! # Buffer Exhaustion / Backpressure
//!
//! When all stripes fill up faster than the flush coordinator can drain them:
//!
//! 1. **Non-blocking append (`try_append`)**: Returns `Err(entry)` immediately
//!    after exponential spin backoff. The caller can retry, drop, or queue the
//!    entry externally.
//!
//! 2. **Blocking append (`append_blocking`)**: Spins briefly, then sleeps with
//!    exponential backoff (1µs → 2µs → 4µs → ... → 1ms) until space is available.
//!    This provides automatic backpressure but may block the calling thread.
//!
//! 3. **Async append (`append_async`)**: Uses `append_blocking` internally, so
//!    it will block until space is available. This is intentional - async here
//!    means "no durability wait", not "non-blocking".
//!
//! **Sizing guidance**: With default settings (16 stripes × 1024 capacity), the
//! WAL can buffer 16,384 entries. At 500K entries/sec with 10ms flush interval,
//! ~5,000 entries accumulate per interval. The default sizing provides ~3x
//! headroom for burst traffic.
//!
//! **Monitoring**: Use `stripe_metrics()` to detect high buffer utilization.
//! If `entries_pending` consistently exceeds 50% of capacity, consider:
//! - Increasing `stripe_capacity`
//! - Increasing `num_stripes`
//! - Reducing flush interval

use std::cell::Cell;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};

use super::lsn_allocator::LsnAllocator;
use super::ring_buffer::{CompletionHandle, PendingEntry};
use super::stripe::{StripeMetrics, WalStripe};
use super::{LSN, WalOperation};

use crate::core::error::{Error, Result, StorageError};

/// Default number of stripes (should be power of 2).
pub const DEFAULT_NUM_STRIPES: usize = 16;

/// Default ring buffer capacity per stripe.
pub const DEFAULT_STRIPE_CAPACITY: usize = 1024;

/// Configuration for the concurrent WAL.
#[derive(Debug, Clone)]
pub struct ConcurrentWalConfig {
    /// WAL directory path.
    pub wal_dir: PathBuf,
    /// Number of stripes (should be power of 2 for efficient modulo).
    pub num_stripes: usize,
    /// Ring buffer capacity per stripe.
    pub stripe_capacity: usize,
    /// Maximum segment size in bytes before rotation.
    pub segment_size: usize,
    /// Number of segments to retain.
    pub segments_to_retain: usize,
}

impl Default for ConcurrentWalConfig {
    fn default() -> Self {
        Self {
            wal_dir: PathBuf::from("data/wal"),
            num_stripes: DEFAULT_NUM_STRIPES,
            stripe_capacity: DEFAULT_STRIPE_CAPACITY,
            segment_size: 64 * 1024 * 1024, // 64 MB
            segments_to_retain: 10,
        }
    }
}

impl ConcurrentWalConfig {
    /// Create a new config with specified WAL directory.
    pub fn new(wal_dir: impl Into<PathBuf>) -> Self {
        Self {
            wal_dir: wal_dir.into(),
            ..Default::default()
        }
    }

    /// Set the number of stripes.
    pub fn with_num_stripes(mut self, num_stripes: usize) -> Self {
        self.num_stripes = num_stripes.next_power_of_two();
        self
    }

    /// Set the stripe capacity.
    pub fn with_stripe_capacity(mut self, capacity: usize) -> Self {
        self.stripe_capacity = capacity;
        self
    }

    /// Set the segment size.
    pub fn with_segment_size(mut self, size: usize) -> Self {
        self.segment_size = size;
        self
    }
}

// Thread-local stripe ID for affinity-based stripe selection.
thread_local! {
    static THREAD_ID_HASH: Cell<Option<u64>> = const { Cell::new(None) };
}

/// Concurrent Write-Ahead Log with striped architecture.
///
/// Provides high-throughput, low-latency WAL operations by distributing
/// writes across multiple stripes with lock-free ring buffers.
pub struct ConcurrentWal {
    /// Configuration.
    config: ConcurrentWalConfig,
    /// Global LSN allocator.
    lsn_allocator: LsnAllocator,
    /// Striped append buffers.
    stripes: Vec<WalStripe>,
    /// Number of stripes (cached for fast modulo).
    num_stripes: usize,
    /// Stripe mask for fast modulo (num_stripes - 1).
    stripe_mask: usize,
    /// Total entries appended across all stripes.
    total_appends: AtomicU64,
    /// Flag indicating if shutdown has been requested.
    shutdown_requested: AtomicBool,
    /// Counter for active batch operations.
    active_batches: AtomicUsize,
}

/// Guard for tracking active batch operations.
struct ActiveBatchGuard<'a>(&'a AtomicUsize);

impl<'a> ActiveBatchGuard<'a> {
    fn new(counter: &'a AtomicUsize) -> Self {
        counter.fetch_add(1, Ordering::SeqCst);
        Self(counter)
    }
}

impl<'a> Drop for ActiveBatchGuard<'a> {
    fn drop(&mut self) {
        self.0.fetch_sub(1, Ordering::SeqCst);
    }
}

impl ConcurrentWal {
    /// Create a new concurrent WAL with the given configuration.
    pub fn new(config: ConcurrentWalConfig) -> Result<Self> {
        let num_stripes = config.num_stripes.next_power_of_two();
        let stripe_mask = num_stripes - 1;

        // Create stripes
        let stripes: Vec<WalStripe> = (0..num_stripes)
            .map(|id| WalStripe::with_capacity(id, config.stripe_capacity))
            .collect();

        Ok(Self {
            config,
            lsn_allocator: LsnAllocator::new(),
            stripes,
            num_stripes,
            stripe_mask,
            total_appends: AtomicU64::new(0),
            shutdown_requested: AtomicBool::new(false),
            active_batches: AtomicUsize::new(0),
        })
    }

    /// Create a new concurrent WAL with default configuration.
    pub fn with_defaults(wal_dir: impl Into<PathBuf>) -> Result<Self> {
        Self::new(ConcurrentWalConfig::new(wal_dir))
    }

    /// Get the current (next to be allocated) LSN.
    #[inline]
    pub fn current_lsn(&self) -> LSN {
        self.lsn_allocator.current()
    }

    /// Get the number of stripes.
    #[inline]
    pub fn num_stripes(&self) -> usize {
        self.num_stripes
    }

    /// Get total entries appended.
    #[inline]
    pub fn total_appends(&self) -> u64 {
        self.total_appends.load(Ordering::Relaxed)
    }

    /// Get the stripe for the current thread.
    ///
    /// Uses thread-local affinity - each thread is assigned to a stripe
    /// on first access and sticks with it for cache efficiency.
    #[inline]
    fn get_stripe(&self) -> &WalStripe {
        let hash = THREAD_ID_HASH.with(|id| {
            if let Some(existing) = id.get() {
                existing
            } else {
                // Assign based on thread ID hash
                let thread_id = std::thread::current().id();
                let h = {
                    use std::hash::{Hash, Hasher};
                    let mut hasher = std::collections::hash_map::DefaultHasher::new();
                    thread_id.hash(&mut hasher);
                    hasher.finish()
                };
                id.set(Some(h));
                h
            }
        });

        // Use hash to determine stripe
        let stripe_id = (hash as usize) & self.stripe_mask;
        &self.stripes[stripe_id]
    }

    /// Get a specific stripe by ID.
    #[inline]
    pub fn stripe(&self, id: usize) -> Option<&WalStripe> {
        self.stripes.get(id)
    }

    /// Check if the WAL is shutting down and return an error if so.
    #[inline]
    fn check_not_shutting_down(&self) -> Result<()> {
        if self.shutdown_requested.load(Ordering::SeqCst) {
            return Err(Error::Storage(StorageError::WalError {
                reason: "WAL is shutting down".to_string(),
            }));
        }
        Ok(())
    }

    /// Append an operation (async mode - returns immediately after buffering).
    ///
    /// The entry is buffered in a stripe's ring buffer and will be
    /// flushed to disk by the background flush coordinator.
    ///
    /// Note: "async" here means no durability wait (not non-blocking).
    /// This method will block if the buffer is full until space becomes
    /// available (backpressure), using exponential backoff.
    ///
    /// # Returns
    ///
    /// The allocated LSN for this entry.
    pub fn append_async(&self, operation: WalOperation) -> Result<LSN> {
        self.check_not_shutting_down()?;
        let _guard = ActiveBatchGuard::new(&self.active_batches);

        let lsn = self.lsn_allocator.allocate();
        let data = self.serialize_entry(lsn, &operation)?;
        let stripe = self.get_stripe();

        match stripe.append_blocking(lsn, data) {
            Ok(()) => {
                self.total_appends.fetch_add(1, Ordering::Relaxed);
                Ok(lsn)
            }
            Err(_entry) => Err(Error::Storage(StorageError::WalError {
                reason: "WAL buffer closed".to_string(),
            })),
        }
    }

    /// Append an operation (sync mode - waits for durability).
    ///
    /// The entry is buffered and the caller blocks until it is
    /// durably flushed to disk.
    ///
    /// # Returns
    ///
    /// - `Ok(lsn)` - The entry is now durable
    /// - `Err(...)` - Flush failed
    pub fn append_sync(&self, operation: WalOperation) -> Result<LSN> {
        self.check_not_shutting_down()?;
        let _guard = ActiveBatchGuard::new(&self.active_batches);

        let lsn = self.lsn_allocator.allocate();
        let data = self.serialize_entry(lsn, &operation)?;
        let stripe = self.get_stripe();

        match stripe.append_sync(lsn, data) {
            Ok(handle) => {
                self.total_appends.fetch_add(1, Ordering::Relaxed);
                // Wait for flush
                handle.wait().map_err(|e| {
                    Error::Storage(StorageError::WalError {
                        reason: format!("WAL flush failed: {}", e),
                    })
                })?;
                Ok(lsn)
            }
            Err(_entry) => Err(Error::Storage(StorageError::WalError {
                reason: "WAL buffer full - backpressure".to_string(),
            })),
        }
    }

    /// Append an operation with a completion handle (for group commit).
    ///
    /// Returns with a handle that can be used to wait for durability later.
    /// This method will block if the buffer is full until space becomes
    /// available (backpressure).
    pub fn append_with_handle(&self, operation: WalOperation) -> Result<(LSN, CompletionHandle)> {
        self.check_not_shutting_down()?;
        let _guard = ActiveBatchGuard::new(&self.active_batches);

        let lsn = self.lsn_allocator.allocate();
        let data = self.serialize_entry(lsn, &operation)?;
        let stripe = self.get_stripe();

        match stripe.append_sync_blocking(lsn, data) {
            Ok(handle) => {
                self.total_appends.fetch_add(1, Ordering::Relaxed);
                Ok((lsn, handle))
            }
            Err(_entry) => Err(Error::Storage(StorageError::WalError {
                reason: "WAL buffer closed".to_string(),
            })),
        }
    }

    /// Append a batch of operations efficiently (async mode - returns immediately).
    ///
    /// This method optimizes for high-throughput workloads by:
    /// - Allocating all LSNs in a single atomic operation
    /// - Serializing all entries into pre-allocated buffers
    /// - Reducing per-operation overhead
    ///
    /// # Performance Benefits
    ///
    /// Compared to calling `append_async()` multiple times:
    /// - Single LSN allocation for all operations (vs N atomic operations)
    /// - Better CPU cache locality during serialization
    /// - Reduced lock contention on stripe buffers
    ///
    /// # Arguments
    ///
    /// * `operations` - Vector of operations to append
    ///
    /// # Returns
    ///
    /// Vector of allocated LSNs in the same order as the operations.
    /// Returns an empty vector if `operations` is empty.
    ///
    /// # Example
    ///
    /// ```ignore
    /// let ops = vec![
    ///     WalOperation::CreateNode { /* ... */ },
    ///     WalOperation::CreateEdge { /* ... */ },
    ///     WalOperation::UpdateNode { /* ... */ },
    /// ];
    ///
    /// let lsns = wal.append_batch(ops)?;
    /// assert_eq!(lsns.len(), 3);
    /// ```
    pub fn append_batch(&self, operations: Vec<WalOperation>) -> Result<Vec<LSN>> {
        self.check_not_shutting_down()?;
        let _guard = ActiveBatchGuard::new(&self.active_batches);

        // Handle empty batch early
        if operations.is_empty() {
            return Ok(Vec::new());
        }

        let count = operations.len() as u64;

        // Defensive check: ensure count > 0 to prevent panic in allocate_batch
        debug_assert!(count > 0, "count should be > 0 after empty check");

        // Allocate all LSNs in a single atomic operation
        let (first_lsn, _last_lsn) = self.lsn_allocator.allocate_batch(count);

        // Pre-allocate result vector
        let mut lsns = Vec::with_capacity(operations.len());

        // Serialize and append each operation individually since each entry has its own LSN.
        // The main optimization is the single batch LSN allocation above (vs N atomic operations).
        for (idx, operation) in operations.into_iter().enumerate() {
            let lsn = LSN(first_lsn.0 + idx as u64);
            lsns.push(lsn);

            let data = self.serialize_entry(lsn, &operation)?;
            let stripe = self.get_stripe();

            match stripe.append_blocking(lsn, data) {
                Ok(()) => {
                    self.total_appends.fetch_add(1, Ordering::Relaxed);
                }
                Err(_entry) => {
                    return Err(Error::Storage(StorageError::WalError {
                        reason: "WAL buffer closed".to_string(),
                    }));
                }
            }
        }

        Ok(lsns)
    }

    /// Append a batch of operations efficiently (sync mode - returns handles to wait on).
    ///
    /// This method mirrors `append_batch` but returns completion handles for each operation,
    /// allowing the caller to wait for durability.
    ///
    /// # Returns
    ///
    /// A tuple containing:
    /// - Vector of allocated LSNs
    /// - Vector of completion handles corresponding to each operation
    pub fn append_batch_with_handles(
        &self,
        operations: Vec<WalOperation>,
    ) -> Result<(Vec<LSN>, Vec<CompletionHandle>)> {
        self.check_not_shutting_down()?;
        let _guard = ActiveBatchGuard::new(&self.active_batches);

        // Handle empty batch early
        if operations.is_empty() {
            return Ok((Vec::new(), Vec::new()));
        }

        let count = operations.len() as u64;

        // Defensive check: ensure count > 0 to prevent panic in allocate_batch
        debug_assert!(count > 0, "count should be > 0 after empty check");

        // Allocate all LSNs in a single atomic operation
        let (first_lsn, _last_lsn) = self.lsn_allocator.allocate_batch(count);

        // Pre-allocate result vectors
        let mut lsns = Vec::with_capacity(operations.len());
        let mut handles = Vec::with_capacity(operations.len());

        // Serialize and append each operation individually since each entry has its own LSN.
        // The main optimization is the single batch LSN allocation above (vs N atomic operations).
        for (idx, operation) in operations.into_iter().enumerate() {
            let lsn = LSN(first_lsn.0 + idx as u64);
            lsns.push(lsn);

            let data = self.serialize_entry(lsn, &operation)?;
            let stripe = self.get_stripe();

            match stripe.append_sync_blocking(lsn, data) {
                Ok(handle) => {
                    self.total_appends.fetch_add(1, Ordering::Relaxed);
                    handles.push(handle);
                }
                Err(_entry) => {
                    return Err(Error::Storage(StorageError::WalError {
                        reason: "WAL buffer closed".to_string(),
                    }));
                }
            }
        }

        Ok((lsns, handles))
    }

    /// Serialize a WAL entry to bytes.
    ///
    /// # Performance Optimization
    ///
    /// Pre-allocates buffer capacity based on operation type to avoid reallocations
    /// during serialization.
    ///
    /// This implementation avoids `WalOperation::clone()` and eliminates the
    /// need for a temporary buffer copy, reducing both CPU and memory overhead.
    fn serialize_entry(&self, lsn: LSN, operation: &WalOperation) -> Result<Vec<u8>> {
        let estimated_capacity = super::estimate_entry_capacity(operation);

        // Security Check: Enforce maximum entry size to prevent DoS
        if estimated_capacity > super::entry::MAX_WAL_ENTRY_SIZE {
            return Err(Error::Storage(StorageError::CapacityExceeded {
                resource: "WAL entry size".to_string(),
                current: estimated_capacity,
                limit: super::entry::MAX_WAL_ENTRY_SIZE,
            }));
        }

        let mut buffer = Vec::with_capacity(estimated_capacity);

        // Generate timestamp
        let timestamp = crate::core::temporal::time::now();

        // Serialize directly into the buffer without creating an intermediate WalEntry
        super::serialization::serialize_operation_into(lsn, timestamp, operation, &mut buffer)?;

        Ok(buffer)
    }

    /// Drain all pending entries from all stripes.
    ///
    /// This is called by the flush coordinator. Entries are returned
    /// sorted by LSN to ensure correct write order.
    pub fn drain_all(&self) -> Vec<PendingEntry> {
        let total_pending: usize = self.stripes.iter().map(|s| s.pending_count()).sum();
        let mut all_entries = Vec::with_capacity(total_pending);

        for stripe in &self.stripes {
            all_entries.extend(stripe.drain());
        }

        // Sort by LSN to restore global order
        all_entries.sort_by_key(|e| e.lsn);

        all_entries
    }

    /// Drain entries from a specific stripe.
    pub fn drain_stripe(&self, stripe_id: usize) -> Vec<PendingEntry> {
        self.stripes
            .get(stripe_id)
            .map(|s| s.drain())
            .unwrap_or_default()
    }

    /// Get metrics for all stripes.
    pub fn stripe_metrics(&self) -> Vec<StripeMetrics> {
        self.stripes.iter().map(|s| s.metrics()).collect()
    }

    /// Close the WAL, preventing new appends.
    pub fn close(&self) {
        for stripe in &self.stripes {
            stripe.close();
        }
    }

    /// Gracefully shutdown the WAL.
    ///
    /// This signals that shutdown is requested (preventing new batches),
    /// waits for all active batches to complete, and then closes the ring buffers.
    pub fn shutdown_graceful(&self) {
        // 1. Signal shutdown
        self.shutdown_requested.store(true, Ordering::SeqCst);

        // 2. Wait for active batches to complete
        let mut spins = 0;
        while self.active_batches.load(Ordering::SeqCst) > 0 {
            if spins < 100 {
                std::hint::spin_loop();
            } else {
                std::thread::yield_now();
            }
            spins += 1;
        }

        // 3. Close buffers
        self.close();
    }

    /// Check if the WAL is closed.
    pub fn is_closed(&self) -> bool {
        self.stripes.first().map(|s| s.is_closed()).unwrap_or(true)
    }

    /// Set the next LSN (for recovery).
    pub fn set_next_lsn(&self, lsn: LSN) {
        self.lsn_allocator.set_next(lsn);
    }

    /// Get the WAL directory.
    pub fn wal_dir(&self) -> &Path {
        &self.config.wal_dir
    }

    /// Get the configuration.
    pub fn config(&self) -> &ConcurrentWalConfig {
        &self.config
    }
}

/// Aggregate metrics for the concurrent WAL.
#[derive(Debug, Clone)]
pub struct ConcurrentWalMetrics {
    /// Total entries appended across all stripes.
    pub total_appends: u64,
    /// Current LSN.
    pub current_lsn: LSN,
    /// Per-stripe metrics.
    pub stripes: Vec<StripeMetrics>,
    /// Total pending entries across all stripes.
    pub total_pending: usize,
}

impl ConcurrentWal {
    /// Get aggregate metrics.
    pub fn metrics(&self) -> ConcurrentWalMetrics {
        let stripes = self.stripe_metrics();
        let total_pending: usize = stripes.iter().map(|s| s.pending_count).sum();

        ConcurrentWalMetrics {
            total_appends: self.total_appends(),
            current_lsn: self.current_lsn(),
            stripes,
            total_pending,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::GLOBAL_INTERNER;
    use crate::core::id::NodeId;
    use crate::core::property::PropertyMap;
    use crate::core::temporal::time;
    use std::sync::Arc;
    use std::thread;
    use tempfile::tempdir;

    fn test_operation() -> WalOperation {
        WalOperation::CreateNode {
            node_id: NodeId::new(1).unwrap(),
            label: GLOBAL_INTERNER.intern("Test").unwrap(),
            properties: PropertyMap::new(),
            valid_from: time::now(),
        }
    }

    // ============================================================
    // TDD Tests - Written FIRST to define expected behavior
    // ============================================================

    #[test]
    fn test_concurrent_wal_creation() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        assert_eq!(wal.num_stripes(), DEFAULT_NUM_STRIPES);
        assert_eq!(wal.current_lsn(), LSN(1));
        assert_eq!(wal.total_appends(), 0);
    }

    #[test]
    fn test_concurrent_wal_custom_stripes() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path()).with_num_stripes(8);
        let wal = ConcurrentWal::new(config).unwrap();

        assert_eq!(wal.num_stripes(), 8);
    }

    #[test]
    fn test_concurrent_wal_stripe_rounding() {
        let dir = tempdir().unwrap();
        // 10 should round up to 16
        let config = ConcurrentWalConfig::new(dir.path()).with_num_stripes(10);
        let wal = ConcurrentWal::new(config).unwrap();

        assert_eq!(wal.num_stripes(), 16);
    }

    #[test]
    fn test_append_async_allocates_lsn() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        let lsn1 = wal.append_async(test_operation()).unwrap();
        let lsn2 = wal.append_async(test_operation()).unwrap();
        let lsn3 = wal.append_async(test_operation()).unwrap();

        assert_eq!(lsn1, LSN(1));
        assert_eq!(lsn2, LSN(2));
        assert_eq!(lsn3, LSN(3));
        assert_eq!(wal.total_appends(), 3);
    }

    #[test]
    fn test_append_with_handle() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        let (lsn, handle) = wal.append_with_handle(test_operation()).unwrap();

        assert_eq!(lsn, LSN(1));
        assert!(!handle.is_complete());
    }

    #[test]
    fn test_drain_all_sorted_by_lsn() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path()).with_num_stripes(4);
        let wal = ConcurrentWal::new(config).unwrap();

        // Append from multiple threads to different stripes
        let wal = Arc::new(wal);
        let handles: Vec<_> = (0..4)
            .map(|_| {
                let wal = Arc::clone(&wal);
                thread::spawn(move || {
                    for _ in 0..10 {
                        wal.append_async(test_operation()).unwrap();
                    }
                })
            })
            .collect();

        for h in handles {
            h.join().unwrap();
        }

        // Drain should return entries sorted by LSN
        let entries = wal.drain_all();
        assert_eq!(entries.len(), 40);

        // Verify sorted order
        for i in 1..entries.len() {
            assert!(entries[i].lsn > entries[i - 1].lsn);
        }
    }

    #[test]
    fn test_stripe_affinity() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path()).with_num_stripes(16);
        let wal = ConcurrentWal::new(config).unwrap();

        // Same thread should always use same stripe
        let stripe1 = wal.get_stripe().id();
        let stripe2 = wal.get_stripe().id();
        let stripe3 = wal.get_stripe().id();

        assert_eq!(stripe1, stripe2);
        assert_eq!(stripe2, stripe3);
    }

    #[test]
    fn test_concurrent_appends() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path()).with_num_stripes(8);
        let wal = Arc::new(ConcurrentWal::new(config).unwrap());

        let num_threads = 8;
        let appends_per_thread = 100;

        let handles: Vec<_> = (0..num_threads)
            .map(|_| {
                let wal = Arc::clone(&wal);
                thread::spawn(move || {
                    for _ in 0..appends_per_thread {
                        wal.append_async(test_operation()).unwrap();
                    }
                })
            })
            .collect();

        for h in handles {
            h.join().unwrap();
        }

        assert_eq!(
            wal.total_appends(),
            (num_threads * appends_per_thread) as u64
        );
        assert_eq!(
            wal.current_lsn(),
            LSN((num_threads * appends_per_thread + 1) as u64)
        );
    }

    #[test]
    fn test_close_prevents_appends() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        wal.close();
        assert!(wal.is_closed());

        let result = wal.append_async(test_operation());
        assert!(result.is_err());
    }

    #[test]
    fn test_metrics() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path()).with_num_stripes(4);
        let wal = ConcurrentWal::new(config).unwrap();

        // Append some entries
        for _ in 0..10 {
            wal.append_async(test_operation()).unwrap();
        }

        let metrics = wal.metrics();
        assert_eq!(metrics.total_appends, 10);
        assert_eq!(metrics.current_lsn, LSN(11));
        assert_eq!(metrics.stripes.len(), 4);
        assert_eq!(metrics.total_pending, 10);
    }

    #[test]
    fn test_set_next_lsn() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        wal.set_next_lsn(LSN(1000));
        assert_eq!(wal.current_lsn(), LSN(1000));

        let lsn = wal.append_async(test_operation()).unwrap();
        assert_eq!(lsn, LSN(1000));
    }

    #[test]
    fn test_drain_stripe() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path()).with_num_stripes(4);
        let wal = ConcurrentWal::new(config).unwrap();

        // Append and check which stripe got it
        wal.append_async(test_operation()).unwrap();

        // One stripe should have an entry
        let total: usize = (0..4).map(|i| wal.drain_stripe(i).len()).sum();
        assert_eq!(total, 1);
    }

    #[test]
    fn test_completion_notification_via_drain() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        let (_lsn, handle) = wal.append_with_handle(test_operation()).unwrap();
        assert!(!handle.is_complete());

        // Drain and notify
        let entries = wal.drain_all();
        for entry in &entries {
            entry.notify_completion();
        }

        assert!(handle.is_complete());
    }

    // ============================================================
    // Batch Append Tests (Issue #219)
    // ============================================================

    #[test]
    fn test_append_batch_allocates_consecutive_lsns() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        let ops = vec![test_operation(), test_operation(), test_operation()];

        let lsns = wal.append_batch(ops).unwrap();

        assert_eq!(lsns.len(), 3);
        assert_eq!(lsns[0], LSN(1));
        assert_eq!(lsns[1], LSN(2));
        assert_eq!(lsns[2], LSN(3));
        assert_eq!(wal.total_appends(), 3);
    }

    #[test]
    fn test_append_batch_empty_operations() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        let ops: Vec<WalOperation> = vec![];
        let lsns = wal.append_batch(ops).unwrap();

        assert_eq!(lsns.len(), 0);
        assert_eq!(wal.total_appends(), 0);
    }

    #[test]
    fn test_append_batch_single_operation() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        let ops = vec![test_operation()];
        let lsns = wal.append_batch(ops).unwrap();

        assert_eq!(lsns.len(), 1);
        assert_eq!(lsns[0], LSN(1));
        assert_eq!(wal.total_appends(), 1);
    }

    #[test]
    fn test_append_batch_many_operations() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        // Create 100 operations to test batch efficiency
        let now = time::now();
        let ops: Vec<WalOperation> = (0..100)
            .map(|i| WalOperation::CreateNode {
                node_id: NodeId::new(i + 1).unwrap(),
                label: GLOBAL_INTERNER.intern(format!("Node{}", i)).unwrap(),
                properties: PropertyMap::new(),
                valid_from: now,
            })
            .collect();

        let lsns = wal.append_batch(ops).unwrap();

        assert_eq!(lsns.len(), 100);
        assert_eq!(lsns[0], LSN(1));
        assert_eq!(lsns[99], LSN(100));
        assert_eq!(wal.total_appends(), 100);
    }

    #[test]
    fn test_append_batch_with_drain() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        let ops = vec![test_operation(), test_operation()];
        let lsns = wal.append_batch(ops).unwrap();

        let entries = wal.drain_all();
        assert_eq!(entries.len(), 2);
        assert_eq!(entries[0].lsn, lsns[0]);
        assert_eq!(entries[1].lsn, lsns[1]);
    }

    #[test]
    fn test_append_batch_interleaved_with_single() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config).unwrap();

        // Single append
        let lsn1 = wal.append_async(test_operation()).unwrap();

        // Batch append
        let batch_lsns = wal
            .append_batch(vec![test_operation(), test_operation()])
            .unwrap();

        // Another single append
        let lsn4 = wal.append_async(test_operation()).unwrap();

        assert_eq!(lsn1, LSN(1));
        assert_eq!(batch_lsns[0], LSN(2));
        assert_eq!(batch_lsns[1], LSN(3));
        assert_eq!(lsn4, LSN(4));
        assert_eq!(wal.total_appends(), 4);
    }

    #[test]
    fn test_concurrent_wal_accessors() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path());
        let wal = ConcurrentWal::new(config.clone()).unwrap();

        assert_eq!(wal.wal_dir(), dir.path());
        assert_eq!(wal.config().num_stripes, config.num_stripes);
        assert!(wal.stripe(0).is_some());
        assert!(wal.stripe(1000).is_none());
    }
}

#[cfg(test)]
mod sentry_tests {
    use super::*;
    use crate::GLOBAL_INTERNER;
    use crate::core::id::NodeId;
    use crate::core::property::PropertyMapBuilder;
    use crate::core::temporal::time;
    use crate::storage::wal::entry::MAX_WAL_ENTRY_SIZE;
    use tempfile::tempdir;

    /// 🎯 Target: MAX_WAL_ENTRY_SIZE boundary check
    /// 💣 Risk: Off-by-one errors (e.g. `>` becoming `>=`) could reject valid max-size entries.
    /// 🧪 Strategy: Construct an entry exactly at the size limit.
    /// 🔬 Verification: Ensure append succeeds.
    #[test]
    fn test_append_entry_exactly_max_size_succeeds() {
        let dir = tempdir().unwrap();
        // Increase segment size to accommodate large entry
        let config = ConcurrentWalConfig::new(dir.path()).with_segment_size(MAX_WAL_ENTRY_SIZE * 2);
        let wal = ConcurrentWal::new(config).unwrap();

        // Calculate size needed for payload
        // CreateNode overhead:
        // Fixed: 24 bytes (LSN + Time + Checksum)
        // Variable: 1 (op) + 8 (node_id) + 4 (label) + 12 (time) = 25 bytes
        // PropertyMap overhead:
        // 4 (count) + 4 (key_len) + key_bytes + 1 (tag_string) + 4 (val_len) + val_bytes

        // Let's use a key "k" (1 byte)
        // Overhead = 24 + 25 + 4 + 4 + 1 + 1 + 4 = 63 bytes
        // Total = 63 + val_bytes
        // Target = MAX_WAL_ENTRY_SIZE
        // val_bytes = MAX_WAL_ENTRY_SIZE - 63

        let overhead = 63;
        let target_val_len = MAX_WAL_ENTRY_SIZE - overhead;

        // Create a string of target length
        // We use repeat to create it efficiently
        let big_string = "x".repeat(target_val_len);

        let properties = PropertyMapBuilder::new().insert("k", big_string).build();

        let op = WalOperation::CreateNode {
            node_id: NodeId::new(1).unwrap(),
            label: GLOBAL_INTERNER.intern("Test").unwrap(),
            properties,
            valid_from: time::now(),
        };

        // Verify our math was correct
        let estimated = crate::storage::wal::estimate_entry_capacity(&op);
        assert_eq!(
            estimated, MAX_WAL_ENTRY_SIZE,
            "Entry size calculation incorrect"
        );

        // Attempt append - should succeed
        let result = wal.append_async(op);
        assert!(
            result.is_ok(),
            "Failed to append entry of exactly MAX_WAL_ENTRY_SIZE: {:?}",
            result.err()
        );
    }

    /// 🎯 Target: MAX_WAL_ENTRY_SIZE boundary check
    /// 💣 Risk: Missing check or loose check (e.g. removing check entirely) allows DoS.
    /// 🧪 Strategy: Construct an entry 1 byte over the limit.
    /// 🔬 Verification: Ensure append fails with CapacityExceeded.
    #[test]
    fn test_append_entry_exceeding_max_size_fails() {
        let dir = tempdir().unwrap();
        let config = ConcurrentWalConfig::new(dir.path()).with_segment_size(MAX_WAL_ENTRY_SIZE * 2);
        let wal = ConcurrentWal::new(config).unwrap();

        // Use same calculation as above but +1 byte
        let overhead = 63;
        let target_val_len = MAX_WAL_ENTRY_SIZE - overhead + 1;

        let big_string = "x".repeat(target_val_len);

        let properties = PropertyMapBuilder::new().insert("k", big_string).build();

        let op = WalOperation::CreateNode {
            node_id: NodeId::new(1).unwrap(),
            label: GLOBAL_INTERNER.intern("Test").unwrap(),
            properties,
            valid_from: time::now(),
        };

        // Verify size
        let estimated = crate::storage::wal::estimate_entry_capacity(&op);
        assert_eq!(
            estimated,
            MAX_WAL_ENTRY_SIZE + 1,
            "Entry size calculation incorrect"
        );

        // Attempt append - should fail
        let result = wal.append_async(op);
        assert!(result.is_err(), "Should have rejected oversized entry");

        match result {
            Err(Error::Storage(StorageError::CapacityExceeded { current, limit, .. })) => {
                assert_eq!(current, MAX_WAL_ENTRY_SIZE + 1);
                assert_eq!(limit, MAX_WAL_ENTRY_SIZE);
            }
            _ => panic!("Expected CapacityExceeded error, got {:?}", result),
        }
    }

    /// 🎯 Target: Thread-local stripe affinity caching.
    /// 💣 Risk: Cached stripe indices from a large WAL (e.g. 32 stripes) can be out of bounds
    ///          when reused by a thread accessing a small WAL (e.g. 4 stripes).
    /// 🧪 Strategy: Spawn threads, force access to large WAL, then small WAL.
    /// 🔬 Verification: Ensure no panic.
    #[test]
    fn test_thread_local_switching_between_sizes() {
        use std::thread;

        // Run multiple threads to ensure we hit a case where hash % 32 > 3
        let handles: Vec<_> = (0..10)
            .map(|_| {
                thread::spawn(|| {
                    // 1. Large WAL (32 stripes)
                    let dir_large = tempdir().unwrap();
                    let config_large =
                        ConcurrentWalConfig::new(dir_large.path()).with_num_stripes(32);
                    let wal_large = ConcurrentWal::new(config_large).unwrap();

                    let op = WalOperation::CreateNode {
                        node_id: NodeId::new(1).unwrap(),
                        label: GLOBAL_INTERNER.intern("Test").unwrap(),
                        properties: PropertyMapBuilder::new().build(),
                        valid_from: time::now(),
                    };

                    // This populates the thread-local cache with an index in [0, 31]
                    wal_large.append_async(op.clone()).unwrap();

                    // 2. Small WAL (4 stripes)
                    let dir_small = tempdir().unwrap();
                    let config_small =
                        ConcurrentWalConfig::new(dir_small.path()).with_num_stripes(4);
                    let wal_small = ConcurrentWal::new(config_small).unwrap();

                    // This should reuse the cached index. If index > 3, it will panic
                    // unless the implementation correctly re-checks or uses a hash.
                    wal_small.append_async(op).unwrap();
                })
            })
            .collect();

        for h in handles {
            h.join().unwrap();
        }
    }
}