rustzmq2 0.1.0 - Docs.rs

//! Pipelined-throughput benches.
//!
//! The round-trip latency benches (`pub_sub.rs`, `req_rep.rs`) measure
//! per-iteration latency: publish one → wait for every sub → repeat.
//! That pattern is serialized end-to-end, so it hides allocation and
//! synchronization overhead under load — particularly PUB's per-subscriber
//! message clone and fair-queue lock contention.
//!
//! These benches pump a large batch of messages and measure total time,
//! exposing that overhead. Also covers DEALER↔ROUTER pipelined async RPC,
//! the libzmq canonical high-throughput pattern. Both `rustzmq2` and
//! `libzmq` (via zmq2 bindings) run the same shapes so
//! `zmqrs/throughput/...` and `libzmq/throughput/...` criterion groups
//! line up for apples-to-apples comparison.
//!
//! Transports: `tcp`, `ipc`, and `inproc`. Every socket in this bench raises
//! `send_hwm`/`receive_hwm` to `BATCH_SIZE * 4` so the full pipeline fits without
//! drops (PUB fire-and-forget) or back-pressure deadlocks (DEALER↔ROUTER).

use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::{mpsc, Arc};
use std::thread;
use std::time::Duration;
use tokio::runtime::{Builder, Runtime};

use rustzmq2::{prelude::*, DealerSocket, PubSocket, RouterSocket, SubSocket, ZmqMessage};

/// 2-worker tokio runtime. Fixed so bench numbers don't depend on the
/// host CPU count. See `compare_libzmq.rs::build_rt` for rationale.
fn build_rt() -> Runtime {
    Builder::new_multi_thread()
        .worker_threads(2)
        .enable_all()
        .build()
        .expect("tokio runtime")
}

const BATCH_SIZE: usize = 1024;
const PIPELINE_SIZES: &[usize] = &[256, 4096];
const SUB_COUNTS: &[usize] = &[1, 8, 64];

static IPC_SEQ: AtomicU64 = AtomicU64::new(0);
fn ipc_path(tag: &str) -> String {
    let n = IPC_SEQ.fetch_add(1, Ordering::Relaxed);
    let pid = std::process::id();
    format!("ipc:///tmp/zmq-tput-{}-{}-{}.sock", tag, pid, n)
}

// ── rustzmq2: pub fanout ─────────────────────────────────────────────────────

fn bench_zmqrs_pub_pipelined(c: &mut Criterion) {
    let rt = build_rt();

    for &transport in &["tcp", "ipc", "inproc"] {
        for &n_subs in SUB_COUNTS {
            let mut group = c.benchmark_group(format!(
                "zmqrs/throughput/pub_fanout/{}/subs={}",
                transport, n_subs
            ));
            group.sample_size(10);
            group.measurement_time(Duration::from_secs(10));
            group.warm_up_time(Duration::from_secs(2));

            for &msg_size in PIPELINE_SIZES {
                let total_bytes = (BATCH_SIZE as u64) * (msg_size as u64) * (n_subs as u64);
                group.throughput(Throughput::Bytes(total_bytes));
                group.bench_with_input(
                    BenchmarkId::from_parameter(msg_size),
                    &msg_size,
                    |b, &msg_size| {
                        bench_zmqrs_pub_pipelined_one(b, &rt, n_subs, msg_size, transport);
                    },
                );
            }
            group.finish();
        }
    }
}

fn bench_zmqrs_pub_pipelined_one(
    b: &mut criterion::Bencher<'_>,
    rt: &Runtime,
    n_subs: usize,
    msg_size: usize,
    transport: &str,
) {
    let endpoint = match transport {
        "tcp" => "tcp://127.0.0.1:0".to_string(),
        "ipc" => ipc_path(&format!("zmqrs-pub-{}-{}", n_subs, msg_size)),
        "inproc" => format!(
            "inproc://bench-tput-zmqrs-pub-{}-{}-{}",
            std::process::id(),
            n_subs,
            msg_size
        ),
        _ => unreachable!(),
    };
    let (mut pub_sock, mut subs) = rt.block_on(async {
        // Raise HWM above BATCH_SIZE so PUB doesn't drop (fire-and-forget)
        // and SUB has room to buffer bursts. Needed for inproc in particular
        // where the inbound channel is sized by receive_hwm.
        let mut p = PubSocket::builder().send_hwm(BATCH_SIZE * 4).build();
        let bound = p.bind(&endpoint).await.expect("bind").to_string();
        let mut subs: Vec<SubSocket> = Vec::with_capacity(n_subs);
        for _ in 0..n_subs {
            let mut s = SubSocket::builder().receive_hwm(BATCH_SIZE * 4).build();
            s.connect(bound.as_str()).await.expect("sub connect");
            s.subscribe("").await.expect("subscribe");
            subs.push(s);
        }
        // Deterministic handshake: publish until every SUB has received one
        // matching message. The prior time-based settle (100 ms) was racy on
        // contended CI runners — if subscribe hadn't round-tripped to PUB by
        // the first measured send, PUB would drop the first burst and the
        // sub tasks would block forever waiting for `recv().await`.
        let handshake_byte = ZmqMessage::from(vec![0xFFu8; 1]);
        let deadline = std::time::Instant::now() + Duration::from_secs(10);
        let mut remaining_subs: Vec<SubSocket> = subs;
        let mut ready: Vec<SubSocket> = Vec::with_capacity(n_subs);
        while !remaining_subs.is_empty() {
            if std::time::Instant::now() > deadline {
                panic!("zmqrs pub→sub subscription handshake timed out after 10s");
            }
            p.send(handshake_byte.clone()).await.expect("pub handshake");
            // Non-blocking drain: whoever is ready gets moved over.
            let mut still_waiting = Vec::with_capacity(remaining_subs.len());
            for mut s in remaining_subs.drain(..) {
                match tokio::time::timeout(Duration::from_millis(5), s.recv()).await {
                    Ok(Ok(_)) => ready.push(s),
                    Ok(Err(_)) => panic!("sub recv errored during handshake"),
                    Err(_) => still_waiting.push(s),
                }
            }
            remaining_subs = still_waiting;
        }
        // Drain any extra handshake bytes left in each SUB's buffer. We've
        // stopped publishing 1-byte handshake messages, so a short recv
        // timeout cleanly catches whatever is still in flight without
        // consuming real payloads.
        for s in &mut ready {
            loop {
                match tokio::time::timeout(Duration::from_millis(50), s.recv()).await {
                    Ok(Ok(_)) => {}
                    Ok(Err(e)) => panic!("sub drain error: {:?}", e),
                    Err(_) => break,
                }
            }
        }
        (p, ready)
    });

    let payload = vec![0xABu8; msg_size];

    b.iter(|| {
        rt.block_on(async {
            // Drop-tolerant recv: PUB on contended runners can overrun the
            // per-peer HWM under tight back-to-back bursts and drop messages
            // (RFC 29 PUB/SUB semantics). The bench measures PUB throughput,
            // so we don't need every message to arrive — only that SUB
            // unblocks when the burst ends. Recv up to BATCH, bail on a
            // 200 ms quiet period.
            let sub_handles: Vec<_> = subs
                .drain(..)
                .map(|mut s| {
                    tokio::spawn(async move {
                        for _ in 0..BATCH_SIZE {
                            match tokio::time::timeout(Duration::from_millis(200), s.recv()).await {
                                Ok(Ok(m)) => {
                                    black_box(m);
                                }
                                Ok(Err(e)) => panic!("sub recv error: {e:?}"),
                                Err(_) => break,
                            }
                        }
                        s
                    })
                })
                .collect();

            for _ in 0..BATCH_SIZE {
                pub_sock
                    .send(ZmqMessage::from(payload.clone()))
                    .await
                    .expect("pub send");
            }

            for h in sub_handles {
                subs.push(h.await.expect("sub task"));
            }
        });
    });

    drop(pub_sock);
    drop(subs);
}

// ── rustzmq2: dealer/router ──────────────────────────────────────────────────

fn bench_zmqrs_dealer_router_pipelined(c: &mut Criterion) {
    let rt = build_rt();

    for &transport in &["tcp", "ipc", "inproc"] {
        let mut group = c.benchmark_group(format!("zmqrs/throughput/dealer_router/{}", transport));
        group.sample_size(10);
        group.measurement_time(Duration::from_secs(10));
        group.warm_up_time(Duration::from_secs(2));

        for &msg_size in PIPELINE_SIZES {
            let total_bytes = (BATCH_SIZE as u64) * (msg_size as u64);
            group.throughput(Throughput::Bytes(total_bytes));
            group.bench_with_input(
                BenchmarkId::from_parameter(msg_size),
                &msg_size,
                |b, &msg_size| {
                    bench_zmqrs_dealer_router_one(b, &rt, msg_size, transport);
                },
            );
        }
        group.finish();
    }
}

fn bench_zmqrs_dealer_router_one(
    b: &mut criterion::Bencher<'_>,
    rt: &Runtime,
    msg_size: usize,
    transport: &str,
) {
    let endpoint = match transport {
        "tcp" => "tcp://127.0.0.1:0".to_string(),
        "ipc" => ipc_path(&format!("zmqrs-dr-{}", msg_size)),
        "inproc" => format!(
            "inproc://bench-tput-zmqrs-dr-{}-{}",
            std::process::id(),
            msg_size
        ),
        _ => unreachable!(),
    };
    let (dealer, router) = rt.block_on(async {
        // Raise HWM above BATCH_SIZE so the pipeline can fit one full batch
        // in flight without blocking; otherwise a classic lockstep deadlock
        // hits when both dealer and router outbound queues fill.
        let mut r = RouterSocket::builder()
            .send_hwm(BATCH_SIZE * 4)
            .receive_hwm(BATCH_SIZE * 4)
            .build();
        let bound = r.bind(&endpoint).await.expect("router bind").to_string();
        let mut d = DealerSocket::builder()
            .send_hwm(BATCH_SIZE * 4)
            .receive_hwm(BATCH_SIZE * 4)
            .build();
        d.connect(bound.as_str()).await.expect("dealer connect");
        tokio::time::sleep(Duration::from_millis(50)).await;
        (d, r)
    });
    let mut dealer = Some(dealer);
    let mut router = Some(router);

    let payload = vec![0xCDu8; msg_size];

    b.iter(|| {
        let mut owned_router = router.take().unwrap();
        let mut owned_dealer = dealer.take().unwrap();
        rt.block_on(async {
            let router_task = tokio::spawn(async move {
                for _ in 0..BATCH_SIZE {
                    let m = owned_router.recv().await.expect("router recv");
                    owned_router.send(m).await.expect("router send");
                }
                owned_router
            });

            for _ in 0..BATCH_SIZE {
                owned_dealer
                    .send(ZmqMessage::from(payload.clone()))
                    .await
                    .expect("dealer send");
            }
            for _ in 0..BATCH_SIZE {
                let got = owned_dealer.recv().await.expect("dealer recv");
                black_box(got);
            }

            let owned_router = router_task.await.expect("router task");
            router.replace(owned_router);
            dealer.replace(owned_dealer);
        });
    });

    drop(dealer);
    drop(router);
}

// ── libzmq: pub fanout ───────────────────────────────────────────────────────
//
// libzmq is blocking. Each subscriber lives on its own thread with a pair
// of channels: the bench driver tells the sub thread how many messages to
// drain this iteration, the sub thread signals back when done. The pub
// side blasts BATCH_SIZE messages in the driver thread, then waits for
// every sub thread to confirm. This matches the rustzmq2 shape
// (concurrent fanout reception) while respecting libzmq's sync API.

fn bench_libzmq_pub_pipelined(c: &mut Criterion) {
    // Mirror the zmqrs side — tcp/ipc only. See file header.
    for &transport in &["tcp", "ipc"] {
        for &n_subs in SUB_COUNTS {
            let mut group = c.benchmark_group(format!(
                "libzmq/throughput/pub_fanout/{}/subs={}",
                transport, n_subs
            ));
            group.sample_size(10);
            group.measurement_time(Duration::from_secs(10));
            group.warm_up_time(Duration::from_secs(2));

            for &msg_size in PIPELINE_SIZES {
                let total_bytes = (BATCH_SIZE as u64) * (msg_size as u64) * (n_subs as u64);
                group.throughput(Throughput::Bytes(total_bytes));
                group.bench_with_input(
                    BenchmarkId::from_parameter(msg_size),
                    &msg_size,
                    |b, &msg_size| {
                        bench_libzmq_pub_pipelined_one(b, n_subs, msg_size, transport);
                    },
                );
            }
            group.finish();
        }
    }
}

fn bench_libzmq_pub_pipelined_one(
    b: &mut criterion::Bencher<'_>,
    n_subs: usize,
    msg_size: usize,
    transport: &str,
) {
    let endpoint = match transport {
        "tcp" => "tcp://127.0.0.1:0".to_string(),
        "ipc" => format!(
            "ipc:///tmp/libzmq-tput-pub-{}-{}-{}-{}.sock",
            std::process::id(),
            IPC_SEQ.fetch_add(1, Ordering::Relaxed),
            n_subs,
            msg_size
        ),
        "inproc" => format!(
            "inproc://libzmq-tput-pub-{}-{}-{}",
            std::process::id(),
            n_subs,
            msg_size
        ),
        _ => unreachable!(),
    };

    let ctx = zmq2::Context::new();
    let pub_sock = ctx.socket(zmq2::PUB).expect("pub socket");
    // Raise SNDHWM so PUB doesn't drop aggressively. libzmq PUB drops
    // silently on HWM (RFC 29); under criterion's back-to-back b.iter
    // loop even 4×BATCH overflowed within a few tens of iters on
    // contended runners. 16× reduces the drop rate but does not
    // eliminate it — the drop-tolerant SUB recv loop below is what
    // actually makes the bench robust.
    let hwm = (BATCH_SIZE * 16) as i32;
    pub_sock.set_sndhwm(hwm).expect("pub send_hwm");
    pub_sock.bind(&endpoint).expect("bind");
    let bound = pub_sock
        .get_last_endpoint()
        .expect("last_endpoint")
        .unwrap();

    struct SubHandle {
        tx_drive: mpsc::Sender<Option<usize>>,
        rx_done: mpsc::Receiver<()>,
        _thread: thread::JoinHandle<()>,
    }

    // Sub thread signals. `rx_drive` is the command channel:
    //   Some(n) → recv exactly `n` messages and ack.
    //   None    → run subscription handshake: recv 1 byte (proving the
    //             subscription filter is live), then drain any queued extras
    //             (from the handshake polling publishes), then ack. After
    //             ack, `recv_bytes` timeout is cleared and the loop is
    //             ready for measured iterations.
    let mut subs: Vec<SubHandle> = Vec::with_capacity(n_subs);
    for _ in 0..n_subs {
        let ctx2 = ctx.clone();
        let bound2 = bound.clone();
        let (tx_drive, rx_drive) = mpsc::channel::<Option<usize>>();
        let (tx_done, rx_done) = mpsc::channel::<()>();
        let t = thread::spawn(move || {
            let sub = ctx2.socket(zmq2::SUB).expect("sub socket");
            sub.set_rcvhwm((BATCH_SIZE * 16) as i32)
                .expect("sub receive_hwm");
            sub.connect(&bound2).expect("sub connect");
            sub.set_subscribe(b"").expect("subscribe");
            tx_done.send(()).ok();
            while let Ok(cmd) = rx_drive.recv() {
                match cmd {
                    None => {
                        // Handshake recv: block until the first 1-byte
                        // message arrives. Ack so the driver stops
                        // publishing handshake bytes. Then wait for a
                        // second drive command (Some(0)) that marks
                        // "driver has stopped publishing; it is safe to
                        // drain leftover handshake bursts without
                        // swallowing real payloads". After draining,
                        // ack again to signal "ready for measurement".
                        let _ = sub.recv_bytes(0).expect("sub handshake recv");
                    }
                    Some(0) => {
                        // Drain phase: the driver has stopped publishing
                        // handshake bytes. Consume whatever bursts are
                        // still in our recv buffer (non-blocking) before
                        // acking.
                        sub.set_rcvtimeo(100)
                            .expect("set receive_timeout for drain");
                        loop {
                            match sub.recv_bytes(0) {
                                Ok(_) => {}
                                Err(zmq2::Error::EAGAIN) => break,
                                Err(e) => panic!("sub drain: {:?}", e),
                            }
                        }
                        sub.set_rcvtimeo(-1).expect("clear receive_timeout");
                    }
                    Some(n) => {
                        // Drop-tolerant recv loop. libzmq PUB drops silently
                        // on HWM — and 1024 tightly-packed back-to-back
                        // publishes under criterion can overrun even a
                        // 16×BATCH HWM after ~60 iters on contended
                        // runners. Set a short receive_timeout so we don't block
                        // forever if a few messages got dropped; ack once
                        // we've either received `n` or seen quiet.
                        //
                        // 200 ms quiet-period is chosen >> typical per-msg
                        // gap (µs on loopback) but << criterion measurement
                        // window, so drops cost a few ms per iter, not a
                        // whole shard timeout.
                        sub.set_rcvtimeo(200)
                            .expect("set receive_timeout for batch recv");
                        let mut received = 0usize;
                        for _ in 0..n {
                            match sub.recv_bytes(0) {
                                Ok(_) => received += 1,
                                Err(zmq2::Error::EAGAIN) => break,
                                Err(e) => panic!("sub recv failed: {e:?}"),
                            }
                        }
                        sub.set_rcvtimeo(-1).expect("clear receive_timeout");
                        let _ = received; // measured as pub throughput; drops are OK
                    }
                }
                if tx_done.send(()).is_err() {
                    break;
                }
            }
        });
        rx_done.recv().expect("sub ready");
        subs.push(SubHandle {
            tx_drive,
            rx_done,
            _thread: t,
        });
    }

    // Deterministic sync handshake. The old 500 ms sleep was racy: on slower
    // runners the subscription filter hadn't round-tripped to PUB by the time
    // the measured loop started, so PUB's first messages were dropped and SUB
    // blocked forever waiting for 1024 messages only `1024 - dropped` of
    // which ever arrived. Now: drive every sub into handshake mode, publish
    // 1-byte bursts until each sub acks, and the sub's own drain step clears
    // any leftover bursts before we enter measurement.
    // Phase 1 of handshake: drive every sub into `None` (handshake-recv)
    // mode and publish 1-byte bursts until each has received one. At that
    // point the subscription filter has provably reached PUB on every link.
    //
    // `pending_ack[i]` tracks whether sub i still owes us its phase-1 ack.
    // Subs ack in whatever order the OS scheduler delivers them; iterating
    // over the full vector each round (not `subs[acks_received..]`) is the
    // correctness fix for the prior version that assumed ordered acks.
    for s in &subs {
        s.tx_drive
            .send(None)
            .expect("drive sub (handshake phase 1)");
    }
    let handshake_timeout = std::time::Instant::now() + Duration::from_secs(30);
    let mut pending_ack = vec![true; n_subs];
    let mut remaining = n_subs;
    while remaining > 0 {
        if std::time::Instant::now() > handshake_timeout {
            panic!("libzmq pub→sub subscription handshake phase 1 timed out after 30s");
        }
        pub_sock
            .send(&[0xFFu8][..], 0)
            .expect("pub send (handshake)");
        thread::sleep(Duration::from_millis(5));
        for (i, s) in subs.iter().enumerate() {
            if !pending_ack[i] {
                continue;
            }
            match s.rx_done.try_recv() {
                Ok(()) => {
                    pending_ack[i] = false;
                    remaining -= 1;
                }
                Err(mpsc::TryRecvError::Empty) => {}
                Err(mpsc::TryRecvError::Disconnected) => {
                    panic!("sub thread exited during handshake phase 1")
                }
            }
        }
    }
    // Phase 2: driver has stopped publishing handshake bytes. Ask every sub
    // to drain any leftover bursts that are sitting in its recv buffer
    // (handshake publishes that landed between our ack-check cycles) and
    // ack when done. After this, the subs' buffers are empty and the next
    // `Some(BATCH_SIZE)` drive operates on a clean slate.
    for s in &subs {
        s.tx_drive
            .send(Some(0))
            .expect("drive sub (handshake phase 2: drain)");
    }
    let drain_timeout = std::time::Instant::now() + Duration::from_secs(10);
    for s in &subs {
        let mut acked = false;
        while !acked {
            if std::time::Instant::now() > drain_timeout {
                panic!("libzmq pub→sub handshake phase 2 timed out after 10s");
            }
            match s.rx_done.recv_timeout(Duration::from_millis(250)) {
                Ok(()) => acked = true,
                Err(mpsc::RecvTimeoutError::Timeout) => {}
                Err(mpsc::RecvTimeoutError::Disconnected) => {
                    panic!("sub thread exited during handshake phase 2")
                }
            }
        }
    }

    let payload: Vec<u8> = vec![0xABu8; msg_size];

    b.iter(|| {
        for s in &subs {
            s.tx_drive.send(Some(BATCH_SIZE)).expect("drive sub");
        }
        for _ in 0..BATCH_SIZE {
            pub_sock.send(&payload[..], 0).expect("pub send");
        }
        for s in &subs {
            s.rx_done.recv().expect("sub done");
        }
    });

    for s in subs {
        drop(s.tx_drive);
        drop(s.rx_done);
    }
}

// ── libzmq: dealer/router ────────────────────────────────────────────────────
//
// Router thread blocks on recv_multipart, echoes every message back.
// Dealer lives in the driver thread, sends BATCH_SIZE then drains
// BATCH_SIZE — matches the rustzmq2 shape.

fn bench_libzmq_dealer_router_pipelined(c: &mut Criterion) {
    for &transport in &["tcp", "ipc", "inproc"] {
        let mut group = c.benchmark_group(format!("libzmq/throughput/dealer_router/{}", transport));
        group.sample_size(10);
        group.measurement_time(Duration::from_secs(10));
        group.warm_up_time(Duration::from_secs(2));

        for &msg_size in PIPELINE_SIZES {
            let total_bytes = (BATCH_SIZE as u64) * (msg_size as u64);
            group.throughput(Throughput::Bytes(total_bytes));
            group.bench_with_input(
                BenchmarkId::from_parameter(msg_size),
                &msg_size,
                |b, &msg_size| {
                    bench_libzmq_dealer_router_one(b, msg_size, transport);
                },
            );
        }
        group.finish();
    }
}

fn bench_libzmq_dealer_router_one(
    b: &mut criterion::Bencher<'_>,
    msg_size: usize,
    transport: &str,
) {
    let endpoint = match transport {
        "tcp" => "tcp://127.0.0.1:0".to_string(),
        "ipc" => format!(
            "ipc:///tmp/libzmq-tput-dr-{}-{}-{}.sock",
            std::process::id(),
            IPC_SEQ.fetch_add(1, Ordering::Relaxed),
            msg_size
        ),
        "inproc" => format!(
            "inproc://libzmq-tput-dr-{}-{}",
            std::process::id(),
            msg_size
        ),
        _ => unreachable!(),
    };

    let ctx = zmq2::Context::new();
    let router_sock = ctx.socket(zmq2::ROUTER).expect("router socket");
    // Match rustzmq2 bench: HWM large enough to fit a full pipeline and
    // avoid the classic dealer↔router lockstep deadlock on BATCH_SIZE > HWM.
    let hwm = (BATCH_SIZE * 4) as i32;
    router_sock.set_sndhwm(hwm).expect("router send_hwm");
    router_sock.set_rcvhwm(hwm).expect("router receive_hwm");
    router_sock.bind(&endpoint).expect("router bind");
    let bound = router_sock
        .get_last_endpoint()
        .expect("last_endpoint")
        .unwrap();

    router_sock.set_rcvtimeo(100).expect("set receive_timeout");
    let stop = Arc::new(AtomicBool::new(false));
    let stop_t = stop.clone();
    let router_thread = thread::spawn(move || loop {
        match router_sock.recv_multipart(0) {
            Ok(parts) => {
                if router_sock.send_multipart(&parts, 0).is_err() {
                    break;
                }
            }
            Err(zmq2::Error::EAGAIN) => {
                if stop_t.load(Ordering::Relaxed) {
                    break;
                }
            }
            Err(_) => break,
        }
    });

    let dealer_sock = ctx.socket(zmq2::DEALER).expect("dealer socket");
    dealer_sock.set_sndhwm(hwm).expect("dealer send_hwm");
    dealer_sock.set_rcvhwm(hwm).expect("dealer receive_hwm");
    dealer_sock.connect(&bound).expect("dealer connect");
    thread::sleep(Duration::from_millis(50));

    let payload: Vec<u8> = vec![0xCDu8; msg_size];

    b.iter(|| {
        for _ in 0..BATCH_SIZE {
            dealer_sock.send(&payload[..], 0).expect("dealer send");
        }
        for _ in 0..BATCH_SIZE {
            let got = dealer_sock.recv_bytes(0).expect("dealer recv");
            black_box(got);
        }
    });

    stop.store(true, Ordering::Relaxed);
    drop(dealer_sock);
    router_thread.join().ok();
}

criterion_group!(
    benches,
    bench_zmqrs_pub_pipelined,
    bench_zmqrs_dealer_router_pipelined,
    bench_libzmq_pub_pipelined,
    bench_libzmq_dealer_router_pipelined,
);
criterion_main!(benches);