gil 0.8.0 - Docs.rs

//! Sharded multi-producer multi-consumer channel.
//!
//! The channel is composed of multiple shards, each being a single-producer single-consumer (SPSC) queue.
//! This structure allows for high throughput and reduced contention when multiple threads are
//! sending or receiving concurrently.
//!
//! # Examples
//!
//! ```
//! use std::thread;
//! use core::num::NonZeroUsize;
//! use gil::mpmc::sharded::channel;
//!
//! let (mut tx, mut rx) = channel::<usize>(
//!     NonZeroUsize::new(4).unwrap(),   // 4 shards
//!     NonZeroUsize::new(256).unwrap(), // 256 capacity per shard
//! );
//!
//! let mut tx2 = tx.try_clone().expect("shard available");
//! let h = thread::spawn(move || tx2.send(1));
//!
//! tx.send(2);
//!
//! let mut values = [rx.recv(), rx.recv()];
//! values.sort();
//! assert_eq!(values, [1, 2]);
//! h.join().unwrap();
//! ```
//!
//! # Performance Implications
//!
//! **Sharded vs. Normal MPMC:**
//!
//! *   **Reduced Contention:** In a standard MPMC queue, all threads contend on the same atomic head and tail
//!     indices. In this sharded implementation, producers and consumers are distributed across multiple
//!     independent SPSC queues (shards). This dramatically reduces CAS (Compare-And-Swap) failures and
//!     cache thrashing under high contention.
//! *   **Throughput:** Throughput typically scales linearly with the number of shards (up to the number of physical cores),
//!     whereas standard MPMC queues often hit a scalability wall.
//! *   **Trade-offs:**
//!     *   **Memory:** Higher memory footprint due to pre-allocating multiple fixed-size buffers.
//!     *   **Bounded Concurrency:** The number of active senders and receivers is bounded by the number of shards.
//!         Operations like `clone()` will fail if all shards are occupied.
//!     *   **Fairness:** Strict global FIFO ordering is not guaranteed; ordering is preserved only within each shard.
//!
//! # Gotchas
//!
//! - **Fallible Clone:** `Sender::try_clone()` and `Receiver::try_clone()` return `Option`. They return
//!   `None` if all shards are already occupied. Always handle this case in production code.
//! - **Power of Two:** `max_shards` must be a power of two.
//! - **ReadGuard Locking:** `Receiver::read_guard()` returns a [`ReadGuard`](crate::read_guard::ReadGuard)
//!   that holds an internal shard lock. The lock is released when the guard is dropped. Holding the
//!   guard for too long will block other receivers from accessing that shard.
//! - **Batch Operations:** This variant supports batch operations via the
//!   [`BatchReader`](crate::read_guard::BatchReader) trait.

use core::num::NonZeroUsize;

mod receiver;
mod sender;
use crate::spsc::shards::ShardsPtr;

pub use receiver::Receiver;
pub use sender::Sender;

/// Creates a new sharded multi-producer multi-consumer channel.
///
/// See the [module-level documentation](self) for more details on performance implications.
///
/// # Arguments
///
/// * `max_shards` - The maximum number of shards to create. This must be a power of two.
/// * `capacity_per_shard` - The capacity of each individual shard.
///
/// # Returns
///
/// A tuple containing a [`Sender`] and a [`Receiver`].
///
/// # Examples
///
/// ```
/// use core::num::NonZeroUsize;
/// use gil::mpmc::sharded::channel;
///
/// let (tx, rx) = channel::<usize>(
///     NonZeroUsize::new(4).unwrap(),
///     NonZeroUsize::new(256).unwrap(),
/// );
/// ```
pub fn channel<T>(
    max_shards: NonZeroUsize,
    capacity_per_shard: NonZeroUsize,
) -> (sender::Sender<T>, receiver::Receiver<T>) {
    debug_assert_ne!(max_shards.get(), 0, "number of shards must be > 0");
    debug_assert!(
        max_shards.is_power_of_two(),
        "number of shards must be a power of 2"
    );

    let shards = ShardsPtr::new(max_shards, capacity_per_shard);

    (
        sender::Sender::new(shards.clone(), max_shards),
        receiver::Receiver::new(shards, max_shards.get()),
    )
}

#[cfg(all(test, not(feature = "loom")))]
mod test {
    use super::*;

    use crate::thread;
    use alloc_crate::vec;

    #[test]
    fn basic() {
        const THREADS: u32 = 8;
        const ITER: u32 = 10;

        let (mut tx, mut rx) = channel(
            NonZeroUsize::new(THREADS as usize).unwrap(),
            NonZeroUsize::new(4).unwrap(),
        );

        thread::scope(move |scope| {
            for thread_id in 0..THREADS - 1 {
                let mut tx = tx.try_clone().unwrap();
                scope.spawn(move || {
                    for i in 0..ITER {
                        tx.send((thread_id, i));
                    }
                });
            }
            scope.spawn(move || {
                for i in 0..ITER {
                    tx.send((THREADS - 1, i));
                }
            });

            let mut sum = 0;
            for _ in 0..THREADS {
                for _ in 0..ITER {
                    let (_thread_id, i) = rx.recv();
                    sum += i;
                }
            }

            assert_eq!(sum, (ITER * (ITER - 1)) / 2 * THREADS);
        });
    }

    #[test]
    fn multiple_senders_multiple_receivers_blocking() {
        use std::sync::Arc;
        use std::sync::atomic::{AtomicUsize, Ordering};
        const SENDERS: usize = 4;
        const RECEIVERS: usize = 4;
        const MESSAGES: usize = 1000;

        let (tx, rx) = channel(
            NonZeroUsize::new(SENDERS).unwrap(),
            NonZeroUsize::new(128).unwrap(),
        );
        let total_received = Arc::new(AtomicUsize::new(0));
        let total_sum = Arc::new(AtomicUsize::new(0));

        thread::scope(|s| {
            for t in 0..SENDERS - 1 {
                let mut tx = tx.try_clone().unwrap();
                s.spawn(move || {
                    for i in 0..MESSAGES {
                        tx.send(t * MESSAGES + i);
                    }
                });
            }
            let mut tx = tx;
            s.spawn(move || {
                for i in 0..MESSAGES {
                    tx.send((SENDERS - 1) * MESSAGES + i);
                }
            });

            for _ in 0..RECEIVERS - 1 {
                let mut rx = rx.try_clone().unwrap();
                let total_received = total_received.clone();
                let total_sum = total_sum.clone();
                s.spawn(move || {
                    for _ in 0..(SENDERS * MESSAGES / RECEIVERS) {
                        let val = rx.recv();
                        total_received.fetch_add(1, Ordering::SeqCst);
                        total_sum.fetch_add(val, Ordering::SeqCst);
                    }
                });
            }
            let mut rx = rx;
            let total_received = total_received.clone();
            let total_sum = total_sum.clone();
            s.spawn(move || {
                for _ in 0..(SENDERS * MESSAGES / RECEIVERS) {
                    let val = rx.recv();
                    total_received.fetch_add(1, Ordering::SeqCst);
                    total_sum.fetch_add(val, Ordering::SeqCst);
                }
            });
        });

        assert_eq!(total_received.load(Ordering::SeqCst), SENDERS * MESSAGES);
        let n = SENDERS * MESSAGES;
        let expected_sum = n * (n - 1) / 2;
        assert_eq!(total_sum.load(Ordering::SeqCst), expected_sum);
    }

    #[test]
    fn multiple_senders_multiple_receivers_try() {
        use std::sync::Arc;
        use std::sync::atomic::{AtomicUsize, Ordering};
        const SENDERS: usize = 4;
        const RECEIVERS: usize = 4;
        const MESSAGES: usize = 1000;

        let (tx, rx) = channel(
            NonZeroUsize::new(SENDERS).unwrap(),
            NonZeroUsize::new(128).unwrap(),
        );
        let total_received = Arc::new(AtomicUsize::new(0));
        let total_sum = Arc::new(AtomicUsize::new(0));

        thread::scope(|s| {
            for t in 0..SENDERS - 1 {
                let mut tx = tx.try_clone().unwrap();
                s.spawn(move || {
                    for i in 0..MESSAGES {
                        let mut backoff = crate::Backoff::with_spin_count(1);
                        while tx.try_send(t * MESSAGES + i).is_err() {
                            backoff.backoff();
                        }
                    }
                });
            }
            let mut tx = tx;
            s.spawn(move || {
                let t = SENDERS - 1;
                for i in 0..MESSAGES {
                    let mut backoff = crate::Backoff::with_spin_count(1);
                    while tx.try_send(t * MESSAGES + i).is_err() {
                        backoff.backoff();
                    }
                }
            });

            for _ in 0..RECEIVERS - 1 {
                let mut rx = rx.try_clone().unwrap();
                let total_received = total_received.clone();
                let total_sum = total_sum.clone();
                s.spawn(move || {
                    let mut count = 0;
                    let mut backoff = crate::Backoff::with_spin_count(1);
                    while count < (SENDERS * MESSAGES / RECEIVERS) {
                        if let Some(val) = rx.try_recv() {
                            total_received.fetch_add(1, Ordering::SeqCst);
                            total_sum.fetch_add(val, Ordering::SeqCst);
                            count += 1;
                        } else {
                            backoff.backoff();
                        }
                    }
                });
            }
            let mut rx = rx;
            let total_received = total_received.clone();
            let total_sum = total_sum.clone();
            s.spawn(move || {
                let mut count = 0;
                let mut backoff = crate::Backoff::with_spin_count(1);
                while count < (SENDERS * MESSAGES / RECEIVERS) {
                    if let Some(val) = rx.try_recv() {
                        total_received.fetch_add(1, Ordering::SeqCst);
                        total_sum.fetch_add(val, Ordering::SeqCst);
                        count += 1;
                    } else {
                        backoff.backoff();
                    }
                }
            });
        });

        assert_eq!(total_received.load(Ordering::SeqCst), SENDERS * MESSAGES);
        let n = SENDERS * MESSAGES;
        let expected_sum = n * (n - 1) / 2;
        assert_eq!(total_sum.load(Ordering::SeqCst), expected_sum);
    }

    #[test]
    fn test_valid_try_sends() {
        let (mut tx, mut rx) =
            channel::<usize>(NonZeroUsize::new(1).unwrap(), NonZeroUsize::new(4).unwrap());
        for _ in 0..4 {
            assert!(rx.try_recv().is_none());
        }
        for i in 0..4 {
            tx.try_send(i).unwrap();
        }
        assert!(tx.try_send(5).is_err());

        for i in 0..4 {
            assert_eq!(rx.try_recv(), Some(i));
        }
        assert!(rx.try_recv().is_none());
        for i in 0..4 {
            tx.try_send(i).unwrap();
        }
    }

    #[test]
    fn test_drop_full_capacity() {
        use std::sync::Arc;
        use std::sync::atomic::{AtomicUsize, Ordering};

        struct DropCounter(Arc<AtomicUsize>);

        impl Drop for DropCounter {
            fn drop(&mut self) {
                self.0.fetch_add(1, Ordering::SeqCst);
            }
        }

        let dropped_count = Arc::new(AtomicUsize::new(0));

        {
            let (mut tx, _rx) = channel::<DropCounter>(
                NonZeroUsize::new(1).unwrap(),
                NonZeroUsize::new(4).unwrap(),
            );

            for _ in 0..4 {
                tx.send(DropCounter(dropped_count.clone()));
            }
        }

        let count = dropped_count.load(Ordering::SeqCst);
        assert_eq!(
            count, 4,
            "Expected 4 items to be dropped, but got {}",
            count
        );
    }

    #[test]
    fn test_batched_sharded_send_recv() {
        const SHARDS: usize = 4;
        const CAPACITY_PER_SHARD: usize = 256;
        const TOTAL_ITEMS_PER_THREAD: usize = 1024;

        let (mut tx, mut rx) = channel(
            NonZeroUsize::new(SHARDS).unwrap(),
            NonZeroUsize::new(CAPACITY_PER_SHARD).unwrap(),
        );

        thread::scope(|scope| {
            for thread_id in 0..SHARDS - 1 {
                let mut tx = tx.try_clone().unwrap();
                scope.spawn(move || {
                    let mut sent = 0;
                    while sent < TOTAL_ITEMS_PER_THREAD {
                        let buffer = tx.write_buffer();
                        let batch_size = buffer.len().min(TOTAL_ITEMS_PER_THREAD - sent);
                        for (i, slot) in buffer.iter_mut().enumerate().take(batch_size) {
                            slot.write(thread_id * 10000 + sent + i);
                        }
                        unsafe { tx.commit(batch_size) };
                        sent += batch_size;
                    }
                });
            }

            scope.spawn(move || {
                let thread_id = SHARDS - 1;
                let mut sent = 0;
                while sent < TOTAL_ITEMS_PER_THREAD {
                    let buffer = tx.write_buffer();
                    let batch_size = buffer.len().min(TOTAL_ITEMS_PER_THREAD - sent);
                    for (i, slot) in buffer.iter_mut().enumerate().take(batch_size) {
                        slot.write(thread_id * 10000 + sent + i);
                    }
                    unsafe { tx.commit(batch_size) };
                    sent += batch_size;
                }
            });

            let mut received_counts = vec![0; SHARDS];
            let mut total_received = 0;
            let total_expected = SHARDS * TOTAL_ITEMS_PER_THREAD;

            while total_received < total_expected {
                let mut guard = rx.read_guard();
                if guard.is_empty() {
                    continue;
                }

                for &value in guard.as_slice() {
                    let thread_id = value / 10000;
                    let sent_id = value % 10000;
                    assert_eq!(sent_id, received_counts[thread_id]);
                    received_counts[thread_id] += 1;
                    total_received += 1;
                }

                let count = guard.len();
                guard.advance(count);
            }

            assert_eq!(total_received, total_expected);
            for count in received_counts {
                assert_eq!(count, TOTAL_ITEMS_PER_THREAD);
            }
        });
    }
}