rivven-client 0.0.22

//! Request pipelining for high-throughput client operations
//!
//! This module provides HTTP/2-style request pipelining that allows multiple
//! in-flight requests over a single connection, dramatically improving throughput
//! for batch operations.
//!
//! # Wire Format Divergence (§10.3)
//!
//! The pipeline framing is **intentionally different** from the standard protocol:
//!
//! | Aspect       | Standard                          | Pipeline                                   |
//! |-------------|-----------------------------------|-------------------------------------------|
//! | Length       | 4 bytes (u32 BE)                  | 4 bytes (u32 BE)                          |
//! | Request ID   | 4-byte correlation_id (in header) | **8-byte u64 request_id** (prepended)     |
//! | Payload      | `[fmt:1][corr_id:4][body]`        | `[req_id:8][fmt:1][corr_id=0:4][body]`    |
//!
//! This design is deliberate: the pipeline uses 8-byte request IDs for a larger
//! ID space (2^64 vs 2^32), while the correlation_id inside the standard wire
//! bytes is hardcoded to 0. The server-side pipeline handler [`crate::pipeline`]
//! strips the 8-byte prefix and dispatches accordingly.
//!
//! The two framing modes are **not interchangeable** — a pipeline client cannot
//! connect to a standard protocol endpoint and vice versa. They use separate
//! TCP listeners.
//!
//! # Features
//!
//! - **Request multiplexing**: Multiple requests share one TCP connection
//! - **Out-of-order responses**: Responses matched by request ID, not order
//! - **Backpressure**: Configurable max in-flight requests
//! - **Automatic batching**: Coalesces small requests for network efficiency
//!
//! # Example
//!
//! ```rust,ignore
//! use rivven_client::{PipelinedClient, PipelineConfig};
//!
//! let config = PipelineConfig::builder()
//!     .max_in_flight(100)
//!     .batch_linger_ms(5)
//!     .build();
//!
//! let client = PipelinedClient::connect("localhost:9092", config).await?;
//!
//! // Send multiple requests concurrently
//! let handles: Vec<_> = (0..1000)
//!     .map(|i| {
//!         let client = client.clone();
//!         tokio::spawn(async move {
//!             client.publish("topic", format!("msg-{}", i)).await
//!         })
//!     })
//!     .collect();
//!
//! // All requests pipelined over single connection
//! for handle in handles {
//!     handle.await??;
//! }
//! ```

use crate::{Error, Request, Response, Result};
use bytes::{Bytes, BytesMut};
use std::collections::HashMap;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::io::{AsyncReadExt, AsyncWriteExt, BufReader, BufWriter};
use tokio::net::TcpStream;
use tokio::sync::{mpsc, oneshot, Mutex, Semaphore};
use tracing::{debug, info, trace, warn};

// ============================================================================
// Type Aliases
// ============================================================================

/// Map of request ID to response channel - used for tracking in-flight requests
type PendingResponses = Arc<Mutex<HashMap<u64, oneshot::Sender<Result<Response>>>>>;

// ============================================================================
// Configuration
// ============================================================================

/// Configuration for request pipelining
#[derive(Debug, Clone)]
pub struct PipelineConfig {
    /// Maximum concurrent in-flight requests
    pub max_in_flight: usize,
    /// Request coalescing window (microseconds)
    pub batch_linger_us: u64,
    /// Maximum batch size before forced flush
    pub max_batch_size: usize,
    /// Read buffer size
    pub read_buffer_size: usize,
    /// Write buffer size
    pub write_buffer_size: usize,
    /// Request timeout
    pub request_timeout: Duration,
    /// Maximum time to wait for in-flight responses during close()
    pub close_timeout: Duration,
    /// Optional TLS configuration
    #[cfg(feature = "tls")]
    pub tls: Option<PipelineTlsConfig>,
    /// Optional authentication credentials
    pub auth: Option<PipelineAuthConfig>,
}

/// TLS configuration for pipelined client
#[cfg(feature = "tls")]
#[derive(Debug, Clone)]
pub struct PipelineTlsConfig {
    /// TLS configuration from rivven-core
    pub tls_config: rivven_core::tls::TlsConfig,
    /// Server name for TLS SNI
    pub server_name: String,
}

/// Authentication configuration for pipelined client
#[derive(Clone)]
pub struct PipelineAuthConfig {
    /// Username
    pub username: String,
    /// Password
    pub password: String,
}

impl std::fmt::Debug for PipelineAuthConfig {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("PipelineAuthConfig")
            .field("username", &self.username)
            .field("password", &"[REDACTED]")
            .finish()
    }
}

impl Default for PipelineConfig {
    fn default() -> Self {
        Self {
            max_in_flight: 100,
            batch_linger_us: 1000, // 1ms
            max_batch_size: 64,
            read_buffer_size: 64 * 1024,  // 64KB
            write_buffer_size: 64 * 1024, // 64KB
            request_timeout: Duration::from_secs(30),
            close_timeout: Duration::from_secs(5),
            #[cfg(feature = "tls")]
            tls: None,
            auth: None,
        }
    }
}

impl PipelineConfig {
    /// Create a new builder
    pub fn builder() -> PipelineConfigBuilder {
        PipelineConfigBuilder::default()
    }

    /// High-throughput configuration
    pub fn high_throughput() -> Self {
        Self {
            max_in_flight: 1000,
            batch_linger_us: 5000, // 5ms
            max_batch_size: 256,
            read_buffer_size: 256 * 1024,
            write_buffer_size: 256 * 1024,
            request_timeout: Duration::from_secs(60),
            close_timeout: Duration::from_secs(10),
            #[cfg(feature = "tls")]
            tls: None,
            auth: None,
        }
    }

    /// Low-latency configuration
    pub fn low_latency() -> Self {
        Self {
            max_in_flight: 32,
            batch_linger_us: 0, // No batching
            max_batch_size: 1,
            read_buffer_size: 16 * 1024,
            write_buffer_size: 16 * 1024,
            request_timeout: Duration::from_secs(10),
            close_timeout: Duration::from_secs(3),
            #[cfg(feature = "tls")]
            tls: None,
            auth: None,
        }
    }
}

/// Builder for PipelineConfig
#[derive(Default)]
pub struct PipelineConfigBuilder {
    config: PipelineConfig,
}

impl PipelineConfigBuilder {
    /// Set maximum in-flight requests
    pub fn max_in_flight(mut self, max: usize) -> Self {
        self.config.max_in_flight = max;
        self
    }

    /// Set batch linger time in milliseconds
    pub fn batch_linger_ms(mut self, ms: u64) -> Self {
        self.config.batch_linger_us = ms * 1000;
        self
    }

    /// Set batch linger time in microseconds
    pub fn batch_linger_us(mut self, us: u64) -> Self {
        self.config.batch_linger_us = us;
        self
    }

    /// Set maximum batch size
    pub fn max_batch_size(mut self, size: usize) -> Self {
        self.config.max_batch_size = size;
        self
    }

    /// Set read buffer size
    pub fn read_buffer_size(mut self, size: usize) -> Self {
        self.config.read_buffer_size = size;
        self
    }

    /// Set write buffer size
    pub fn write_buffer_size(mut self, size: usize) -> Self {
        self.config.write_buffer_size = size;
        self
    }

    /// Set request timeout
    pub fn request_timeout(mut self, timeout: Duration) -> Self {
        self.config.request_timeout = timeout;
        self
    }

    /// Set TLS configuration for encrypted connections
    #[cfg(feature = "tls")]
    pub fn tls(
        mut self,
        tls_config: rivven_core::tls::TlsConfig,
        server_name: impl Into<String>,
    ) -> Self {
        self.config.tls = Some(PipelineTlsConfig {
            tls_config,
            server_name: server_name.into(),
        });
        self
    }

    /// Set authentication credentials
    pub fn auth(mut self, username: impl Into<String>, password: impl Into<String>) -> Self {
        self.config.auth = Some(PipelineAuthConfig {
            username: username.into(),
            password: password.into(),
        });
        self
    }

    /// Set close timeout for draining in-flight responses
    pub fn close_timeout(mut self, timeout: Duration) -> Self {
        self.config.close_timeout = timeout;
        self
    }

    /// Build the configuration
    pub fn build(self) -> PipelineConfig {
        self.config
    }
}

// ============================================================================
// Pipelined Request
// ============================================================================

/// A pipelined request with its response channel
struct PipelinedRequest {
    /// Unique request ID for response matching
    id: u64,
    /// Serialized request bytes
    data: Bytes,
    /// Channel to send the response
    response_tx: oneshot::Sender<Result<Response>>,
    /// Request creation time for timeout tracking
    #[allow(dead_code)] // Used for future timeout handling in writer task
    created_at: Instant,
}

// ============================================================================
// Pipelined Client
// ============================================================================

/// High-throughput pipelined client
///
/// Uses request multiplexing to send multiple requests over a single connection
/// without waiting for responses, dramatically improving throughput.
pub struct PipelinedClient {
    inner: Arc<PipelinedClientInner>,
}

struct PipelinedClientInner {
    /// Channel to send requests to the writer task
    request_tx: mpsc::Sender<PipelinedRequest>,
    /// Semaphore to limit in-flight requests
    in_flight_semaphore: Arc<Semaphore>,
    /// Request ID counter
    next_request_id: AtomicU64,
    /// Configuration
    config: PipelineConfig,
    /// Statistics
    stats: Arc<PipelineStats>,
    /// Shutdown signal for background tasks
    shutdown: tokio::sync::watch::Sender<bool>,
    /// Pending response channels — shared with reader/writer tasks.
    /// Exposed here so `send_request` can clean up entries on timeout.
    pending_responses: Arc<Mutex<HashMap<u64, oneshot::Sender<Result<Response>>>>>,
}

impl Clone for PipelinedClient {
    fn clone(&self) -> Self {
        Self {
            inner: Arc::clone(&self.inner),
        }
    }
}

impl PipelinedClient {
    /// Connect to a Rivven server with request pipelining
    ///
    /// When `config.auth` is set, authenticates using SCRAM-SHA-256
    /// immediately after establishing the pipeline, before returning the client.
    pub async fn connect(addr: &str, config: PipelineConfig) -> Result<Self> {
        let auth_config = config.auth.clone();
        let connect_timeout = config.request_timeout;
        let stream = tokio::time::timeout(connect_timeout, TcpStream::connect(addr))
            .await
            .map_err(|_| Error::Timeout)?
            .map_err(|e| Error::ConnectionError(e.to_string()))?;

        // Set TCP_NODELAY for low latency
        stream
            .set_nodelay(true)
            .map_err(|e| Error::ConnectionError(format!("Failed to set TCP_NODELAY: {}", e)))?;

        // Optionally upgrade to TLS
        #[cfg(feature = "tls")]
        if let Some(tls_cfg) = &config.tls {
            let connector = rivven_core::tls::TlsConnector::new(&tls_cfg.tls_config)
                .map_err(|e| Error::ConnectionError(format!("TLS config error: {e}")))?;
            let tls_stream = connector
                .connect(stream, &tls_cfg.server_name)
                .await
                .map_err(|e| Error::ConnectionError(format!("TLS handshake error: {e}")))?;
            let (read_half, write_half) = tokio::io::split(tls_stream);
            let client = Self::setup_pipeline(addr, config, read_half, write_half).await?;
            // Version handshake before authentication
            Self::pipeline_handshake(&client).await?;
            // authenticate after pipeline setup
            if let Some(auth) = &auth_config {
                Self::pipeline_authenticate(&client, &auth.username, &auth.password).await?;
            }
            return Ok(client);
        }

        let (read_half, write_half) = stream.into_split();
        let client = Self::setup_pipeline(addr, config, read_half, write_half).await?;
        // Version handshake before authentication
        Self::pipeline_handshake(&client).await?;
        // authenticate after pipeline setup
        if let Some(auth) = &auth_config {
            Self::pipeline_authenticate(&client, &auth.username, &auth.password).await?;
        }
        Ok(client)
    }

    /// Internal method to set up the pipeline tasks from a split stream
    async fn setup_pipeline<R, W>(
        _addr: &str,
        config: PipelineConfig,
        read_half: R,
        write_half: W,
    ) -> Result<Self>
    where
        R: tokio::io::AsyncRead + Unpin + Send + 'static,
        W: tokio::io::AsyncWrite + Unpin + Send + 'static,
    {
        // Create channels
        let (request_tx, request_rx) = mpsc::channel(config.max_in_flight * 2);
        let in_flight_semaphore = Arc::new(Semaphore::new(config.max_in_flight));
        let pending_responses = Arc::new(Mutex::new(HashMap::new()));

        // Create shutdown signal
        let (shutdown_tx, shutdown_rx) = tokio::sync::watch::channel(false);

        // Create shared stats
        let stats = Arc::new(PipelineStats::new());

        // Start writer task
        let writer_config = config.clone();
        let pending_for_writer = Arc::clone(&pending_responses);
        let writer_shutdown = shutdown_rx.clone();
        let writer_stats = Arc::clone(&stats);
        tokio::spawn(async move {
            writer_task(
                write_half,
                request_rx,
                pending_for_writer,
                writer_config,
                writer_shutdown,
                writer_stats,
            )
            .await;
        });

        // Start reader task
        let reader_config = config.clone();
        let pending_for_reader = Arc::clone(&pending_responses);
        let reader_shutdown = shutdown_rx;
        tokio::spawn(async move {
            reader_task(
                read_half,
                pending_for_reader,
                reader_config,
                reader_shutdown,
            )
            .await;
        });

        Ok(Self {
            inner: Arc::new(PipelinedClientInner {
                request_tx,
                in_flight_semaphore,
                next_request_id: AtomicU64::new(1),
                config,
                stats,
                shutdown: shutdown_tx,
                pending_responses,
            }),
        })
    }

    /// Signal shutdown and close the pipeline.
    ///
    /// ## Behavior
    ///
    /// This sends a shutdown signal to the writer and reader background
    /// tasks. **In-flight requests that have already been written to the
    /// socket but whose responses have not yet been read will receive a
    /// `ConnectionError("Connection closed")` on their response channel**
    /// once the reader task exits.
    ///
    /// Callers that require all in-flight responses to be drained before
    /// closing should stop sending new requests and `await` all pending
    /// response futures *before* calling `close()`.
    ///
    /// The method attempts a best-effort drain: it drops the request
    /// channel (preventing new requests) and gives the reader task a short
    /// window to finish reading already-written responses before the
    /// shutdown flag takes effect.
    pub async fn close(&self) {
        // Wait for in-flight requests to drain before shutdown (CLIENT-10).
        // This gives outstanding responses a chance to arrive so callers
        // don't silently lose acknowledgments.
        let drain_deadline = tokio::time::Instant::now() + self.inner.config.close_timeout;
        loop {
            let pending = {
                let map = self.inner.pending_responses.lock().await;
                map.len()
            };
            if pending == 0 {
                break;
            }
            if tokio::time::Instant::now() >= drain_deadline {
                tracing::warn!(
                    pending,
                    "Pipeline close() timed out waiting for in-flight responses"
                );
                break;
            }
            tokio::time::sleep(Duration::from_millis(25)).await;
        }

        // Signal shutdown — the reader task will drain pending responses
        // until the connection is closed or the flag is observed.
        let _ = self.inner.shutdown.send(true);
    }

    /// Perform version handshake over the pipeline.
    ///
    /// Mirrors [`Client::handshake()`] — sends a `Request::Handshake` and
    /// validates the server speaks a compatible protocol version.  Old servers
    /// that don't recognise the message return `Response::Error`, which is
    /// accepted with a warning so the pipeline degrades gracefully.
    async fn pipeline_handshake(client: &PipelinedClient) -> Result<()> {
        let response = client
            .send_request(Request::Handshake {
                protocol_version: rivven_protocol::PROTOCOL_VERSION,
                client_id: format!("pipeline-{}", std::process::id()),
            })
            .await?;

        match response {
            Response::HandshakeResult {
                compatible,
                server_version,
                message: _,
            } => {
                if compatible {
                    info!(
                        "Pipeline handshake OK (client v{}, server v{})",
                        rivven_protocol::PROTOCOL_VERSION,
                        server_version
                    );
                    Ok(())
                } else {
                    Err(Error::ProtocolError(
                        rivven_protocol::ProtocolError::VersionMismatch {
                            expected: rivven_protocol::PROTOCOL_VERSION,
                            actual: server_version,
                        },
                    ))
                }
            }
            Response::Error { message } => {
                // Server doesn't support handshake — proceed for backward
                // compatibility with older servers
                warn!(
                    "Server returned error on pipeline handshake: {}, proceeding anyway",
                    message
                );
                Ok(())
            }
            _ => {
                warn!("Server did not return HandshakeResult, proceeding without version check");
                Ok(())
            }
        }
    }

    /// Perform SCRAM-SHA-256 authentication over the pipeline.
    ///
    /// Reuses the crate-level SCRAM helpers from `client.rs` but sends
    /// requests/responses through the pipeline's `send_request()` path.
    async fn pipeline_authenticate(
        client: &PipelinedClient,
        username: &str,
        password: &str,
    ) -> Result<()> {
        use crate::client::{
            base64_decode, base64_encode, escape_username, generate_nonce, parse_server_first,
            pbkdf2_sha256, sha256, xor_bytes,
        };
        use rivven_core::PasswordHash;

        // Step 1: Send client-first
        let client_nonce = generate_nonce();
        let client_first_bare = format!("n={},r={}", escape_username(username), client_nonce);
        let client_first = format!("n,,{}", client_first_bare);

        let response = client
            .send_request(Request::ScramClientFirst {
                message: Bytes::from(client_first.clone()),
            })
            .await?;

        // Step 2: Parse server-first
        let server_first = match response {
            Response::ScramServerFirst { message } => String::from_utf8(message.to_vec())
                .map_err(|_| Error::AuthenticationFailed("Invalid server-first encoding".into()))?,
            Response::Error { message } => return Err(Error::AuthenticationFailed(message)),
            _ => return Err(Error::InvalidResponse),
        };

        let (combined_nonce, salt_b64, iterations) = parse_server_first(&server_first)?;

        if !combined_nonce.starts_with(&client_nonce) {
            return Err(Error::AuthenticationFailed("Server nonce mismatch".into()));
        }

        let salt = base64_decode(&salt_b64)
            .map_err(|_| Error::AuthenticationFailed("Invalid salt encoding".into()))?;

        // Step 3: Compute proof and send client-final
        let salted_password = pbkdf2_sha256(password.as_bytes(), &salt, iterations);
        let client_key = PasswordHash::hmac_sha256(&salted_password, b"Client Key");
        let stored_key = sha256(&client_key);

        let client_final_without_proof = format!("c=biws,r={}", combined_nonce);
        let auth_message = format!(
            "{},{},{}",
            client_first_bare, server_first, client_final_without_proof
        );

        let client_signature = PasswordHash::hmac_sha256(&stored_key, auth_message.as_bytes());
        let client_proof = xor_bytes(&client_key, &client_signature);

        let client_final = format!(
            "{},p={}",
            client_final_without_proof,
            base64_encode(&client_proof)
        );

        let response = client
            .send_request(Request::ScramClientFinal {
                message: Bytes::from(client_final),
            })
            .await?;

        // Step 4: Verify server-final
        match response {
            Response::ScramServerFinal { message, .. } => {
                let verifier = String::from_utf8(message.to_vec())
                    .map_err(|_| Error::AuthenticationFailed("Invalid server-final".into()))?;

                // Verify server signature
                let server_key = PasswordHash::hmac_sha256(&salted_password, b"Server Key");
                let expected_sig = PasswordHash::hmac_sha256(&server_key, auth_message.as_bytes());
                let expected_verifier = format!("v={}", base64_encode(&expected_sig));

                if verifier != expected_verifier {
                    return Err(Error::AuthenticationFailed(
                        "Server signature mismatch".into(),
                    ));
                }

                tracing::info!("Pipeline SCRAM auth successful for '{}'", username);
                Ok(())
            }
            Response::Error { message } => Err(Error::AuthenticationFailed(message)),
            _ => Err(Error::InvalidResponse),
        }
    }

    /// Send a request and wait for the response
    pub async fn send_request(&self, request: Request) -> Result<Response> {
        // Acquire in-flight permit (backpressure)
        let _permit = self
            .inner
            .in_flight_semaphore
            .acquire()
            .await
            .map_err(|_| Error::ConnectionError("Pipeline closed".into()))?;

        let request_id = self.inner.next_request_id.fetch_add(1, Ordering::Relaxed);
        let (response_tx, response_rx) = oneshot::channel();

        // Serialize request with wire format and ID prefix
        let request_bytes = request.to_wire(rivven_protocol::WireFormat::Postcard, 0u32)?;

        // Reject oversized requests client-side before queueing for the writer
        // task. This prevents a TCP-level deadlock where write_all() blocks
        // waiting for the server to read, while the server rejects and tries
        // to respond.
        if request_bytes.len() > rivven_protocol::MAX_MESSAGE_SIZE {
            return Err(Error::RequestTooLarge(
                request_bytes.len(),
                rivven_protocol::MAX_MESSAGE_SIZE,
            ));
        }

        let mut data = BytesMut::with_capacity(8 + request_bytes.len());
        data.extend_from_slice(&request_id.to_be_bytes());
        data.extend_from_slice(&request_bytes);

        let pipelined = PipelinedRequest {
            id: request_id,
            data: data.freeze(),
            response_tx,
            created_at: Instant::now(),
        };

        // Send to writer task
        self.inner
            .request_tx
            .send(pipelined)
            .await
            .map_err(|_| Error::ConnectionError("Writer task closed".into()))?;

        self.inner
            .stats
            .requests_sent
            .fetch_add(1, Ordering::Relaxed);

        // Wait for response with timeout
        let timeout_duration = self.inner.config.request_timeout;
        match tokio::time::timeout(timeout_duration, response_rx).await {
            Ok(Ok(result)) => {
                self.inner
                    .stats
                    .responses_received
                    .fetch_add(1, Ordering::Relaxed);
                result
            }
            Ok(Err(_)) => Err(Error::ConnectionError("Response channel dropped".into())),
            Err(_) => {
                // Remove the stale pending_responses entry so it doesn't
                // leak memory if the server never responds.
                {
                    let mut pending = self.inner.pending_responses.lock().await;
                    pending.remove(&request_id);
                }
                self.inner.stats.timeouts.fetch_add(1, Ordering::Relaxed);
                Err(Error::Timeout)
            }
        }
    }

    /// Publish a message to a topic
    pub async fn publish(&self, topic: impl Into<String>, value: impl Into<Bytes>) -> Result<u64> {
        let request = Request::Publish {
            topic: topic.into(),
            partition: None,
            key: None,
            value: value.into(),
            leader_epoch: None,
        };

        match self.send_request(request).await? {
            Response::Published { offset, .. } => Ok(offset),
            Response::Error { message } => Err(Error::ServerError(message)),
            _ => Err(Error::InvalidResponse),
        }
    }

    /// Publish a message with a key
    pub async fn publish_with_key(
        &self,
        topic: impl Into<String>,
        key: impl Into<Bytes>,
        value: impl Into<Bytes>,
    ) -> Result<u64> {
        let request = Request::Publish {
            topic: topic.into(),
            partition: None,
            key: Some(key.into()),
            value: value.into(),
            leader_epoch: None,
        };

        match self.send_request(request).await? {
            Response::Published { offset, .. } => Ok(offset),
            Response::Error { message } => Err(Error::ServerError(message)),
            _ => Err(Error::InvalidResponse),
        }
    }

    /// Get pipeline statistics
    pub fn stats(&self) -> PipelineStatsSnapshot {
        PipelineStatsSnapshot {
            requests_sent: self.inner.stats.requests_sent.load(Ordering::Relaxed),
            responses_received: self.inner.stats.responses_received.load(Ordering::Relaxed),
            batches_flushed: self.inner.stats.batches_flushed.load(Ordering::Relaxed),
            timeouts: self.inner.stats.timeouts.load(Ordering::Relaxed),
        }
    }
}

// ============================================================================
// Writer Task
// ============================================================================

/// Background task that batches and sends requests
async fn writer_task<W: tokio::io::AsyncWrite + Unpin>(
    write_half: W,
    mut request_rx: mpsc::Receiver<PipelinedRequest>,
    pending: PendingResponses,
    config: PipelineConfig,
    mut shutdown: tokio::sync::watch::Receiver<bool>,
    stats: Arc<PipelineStats>,
) {
    let mut writer = BufWriter::with_capacity(config.write_buffer_size, write_half);
    let mut batch: Vec<PipelinedRequest> = Vec::with_capacity(config.max_batch_size);
    let mut batch_started: Option<Instant> = None;

    loop {
        // Check for shutdown
        if *shutdown.borrow() {
            break;
        }

        // Try to receive with linger timeout
        let request = if batch.is_empty() {
            // No pending requests, block until one arrives or shutdown
            tokio::select! {
                req = request_rx.recv() => {
                    match req {
                        Some(req) => Some(req),
                        None => break, // Channel closed
                    }
                }
                _ = shutdown.changed() => {
                    if *shutdown.borrow() {
                        break;
                    }
                    continue;
                }
            }
        } else if config.batch_linger_us == 0 {
            // No batching, flush immediately
            None
        } else {
            // Have pending requests, wait for more with timeout
            let elapsed = batch_started
                .map(|t| t.elapsed().as_micros() as u64)
                .unwrap_or(0);
            let remaining = config.batch_linger_us.saturating_sub(elapsed);

            if remaining == 0 {
                None // Linger expired
            } else {
                match tokio::time::timeout(Duration::from_micros(remaining), request_rx.recv())
                    .await
                {
                    Ok(Some(req)) => Some(req),
                    Ok(None) => break, // Channel closed
                    Err(_) => None,    // Timeout, flush batch
                }
            }
        };

        if let Some(req) = request {
            if batch.is_empty() {
                batch_started = Some(Instant::now());
            }
            batch.push(req);
        }

        // Flush if batch is full or linger expired (no new request arrived)
        let should_flush = batch.len() >= config.max_batch_size
            || (!batch.is_empty()
                && batch_started
                    .is_some_and(|t| t.elapsed().as_micros() as u64 >= config.batch_linger_us));

        if should_flush && !batch.is_empty() {
            if let Err(e) = flush_batch(&mut writer, &mut batch, &pending, &stats).await {
                warn!("Failed to flush batch: {}", e);
                // Notify all pending requests of the error
                for req in batch.drain(..) {
                    let _ = req
                        .response_tx
                        .send(Err(Error::ConnectionError(e.to_string())));
                }
            }
            batch_started = None;
        }
    }

    // Flush any remaining requests
    if !batch.is_empty() {
        if let Err(e) = flush_batch(&mut writer, &mut batch, &pending, &stats).await {
            tracing::warn!(error = %e, "Failed to flush remaining batch on shutdown");
        }
    }
}

/// Flush a batch of requests
async fn flush_batch<W: tokio::io::AsyncWrite + Unpin>(
    writer: &mut BufWriter<W>,
    batch: &mut Vec<PipelinedRequest>,
    pending: &PendingResponses,
    stats: &PipelineStats,
) -> std::io::Result<()> {
    let batch_count = batch.len();

    // Phase 1: Register all pending entries BEFORE writing to the socket.
    // This ensures the reader task can always find the entry even if a
    // response arrives before we finish writing the full batch.
    let mut request_data: Vec<(u64, bytes::Bytes)> = Vec::with_capacity(batch_count);
    {
        let mut pending_guard = pending.lock().await;
        for req in batch.drain(..) {
            pending_guard.insert(req.id, req.response_tx);
            request_data.push((req.id, req.data));
        }
    }
    // Lock released here — reader task is unblocked during I/O.

    // Phase 2: Write all requests to the socket without holding the lock.
    let write_result: std::io::Result<()> = async {
        for (_id, data) in &request_data {
            let len: u32 = data.len().try_into().unwrap_or(u32::MAX);
            writer.write_all(&len.to_be_bytes()).await?;
            writer.write_all(data).await?;
        }
        writer.flush().await?;
        Ok(())
    }
    .await;

    if let Err(ref e) = write_result {
        // I/O failed — remove entries we registered in Phase 1 and notify
        // callers with a proper error instead of leaving them orphaned.
        let mut pending_guard = pending.lock().await;
        for (id, _) in &request_data {
            if let Some(tx) = pending_guard.remove(id) {
                let _ = tx.send(Err(Error::ConnectionError(e.to_string())));
            }
        }
        return Err(std::io::Error::new(
            std::io::ErrorKind::BrokenPipe,
            e.to_string(),
        ));
    }

    trace!("Flushed batch of {} requests", batch_count);
    stats.batches_flushed.fetch_add(1, Ordering::Relaxed);

    Ok(())
}

// ============================================================================
// Reader Task
// ============================================================================

/// Background task that reads and dispatches responses
async fn reader_task<R: tokio::io::AsyncRead + Unpin>(
    read_half: R,
    pending: PendingResponses,
    config: PipelineConfig,
    mut shutdown: tokio::sync::watch::Receiver<bool>,
) {
    let mut reader = BufReader::with_capacity(config.read_buffer_size, read_half);
    let mut len_buf = [0u8; 4];
    let mut id_buf = [0u8; 8];

    loop {
        // Check for shutdown
        if *shutdown.borrow() {
            break;
        }

        // Read response length with shutdown check
        let read_result = tokio::select! {
            result = reader.read_exact(&mut len_buf) => result,
            _ = shutdown.changed() => {
                if *shutdown.borrow() {
                    break;
                }
                continue;
            }
        };

        if read_result.is_err() {
            break; // Connection closed
        }
        let msg_len = u32::from_be_bytes(len_buf) as usize;

        // Validate response size to prevent OOM from malicious/buggy servers
        const MAX_PIPELINE_RESPONSE_SIZE: usize = 100 * 1024 * 1024; // 100 MB
        if msg_len > MAX_PIPELINE_RESPONSE_SIZE {
            warn!(
                "Pipeline response too large: {} bytes (max {})",
                msg_len, MAX_PIPELINE_RESPONSE_SIZE
            );
            break;
        }

        if msg_len < 8 {
            warn!(
                "Invalid response length: {} — stream desynchronized, closing connection",
                msg_len
            );
            break;
        }

        // Read request ID
        if reader.read_exact(&mut id_buf).await.is_err() {
            break;
        }
        let request_id = u64::from_be_bytes(id_buf);

        // Read response body
        let body_len = msg_len - 8;
        let mut response_buf = vec![0u8; body_len];
        if reader.read_exact(&mut response_buf).await.is_err() {
            break;
        }

        // Parse response (auto-detects wire format)
        let result = Response::from_wire(&response_buf)
            .map(|(resp, _format, _correlation_id)| resp)
            .map_err(Error::ProtocolError);

        // Dispatch to waiting request
        let sender = {
            let mut pending_guard = pending.lock().await;
            pending_guard.remove(&request_id)
        };

        if let Some(tx) = sender {
            let _ = tx.send(result);
        } else {
            debug!("Received response for unknown request ID: {}", request_id);
        }
    }

    // Connection closed, fail all pending requests
    let mut pending_guard = pending.lock().await;
    for (_, tx) in pending_guard.drain() {
        let _ = tx.send(Err(Error::ConnectionError("Connection closed".into())));
    }
}

// ============================================================================
// Statistics
// ============================================================================

/// Pipeline statistics (atomic)
struct PipelineStats {
    requests_sent: AtomicU64,
    responses_received: AtomicU64,
    batches_flushed: AtomicU64,
    timeouts: AtomicU64,
}

impl PipelineStats {
    fn new() -> Self {
        Self {
            requests_sent: AtomicU64::new(0),
            responses_received: AtomicU64::new(0),
            batches_flushed: AtomicU64::new(0),
            timeouts: AtomicU64::new(0),
        }
    }
}

/// Snapshot of pipeline statistics
#[derive(Debug, Clone)]
pub struct PipelineStatsSnapshot {
    /// Total requests sent
    pub requests_sent: u64,
    /// Total responses received
    pub responses_received: u64,
    /// Total batches flushed
    pub batches_flushed: u64,
    /// Total request timeouts
    pub timeouts: u64,
}

impl PipelineStatsSnapshot {
    /// Get in-flight request count
    pub fn in_flight(&self) -> u64 {
        self.requests_sent.saturating_sub(self.responses_received)
    }

    /// Get success rate (0.0 to 1.0)
    pub fn success_rate(&self) -> f64 {
        if self.requests_sent == 0 {
            1.0
        } else {
            self.responses_received as f64 / self.requests_sent as f64
        }
    }
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_pipeline_config_builder() {
        let config = PipelineConfig::builder()
            .max_in_flight(200)
            .batch_linger_ms(10)
            .max_batch_size(128)
            .request_timeout(Duration::from_secs(60))
            .build();

        assert_eq!(config.max_in_flight, 200);
        assert_eq!(config.batch_linger_us, 10_000);
        assert_eq!(config.max_batch_size, 128);
        assert_eq!(config.request_timeout, Duration::from_secs(60));
    }

    #[test]
    fn test_high_throughput_config() {
        let config = PipelineConfig::high_throughput();
        assert_eq!(config.max_in_flight, 1000);
        assert_eq!(config.batch_linger_us, 5000);
        assert_eq!(config.max_batch_size, 256);
    }

    #[test]
    fn test_low_latency_config() {
        let config = PipelineConfig::low_latency();
        assert_eq!(config.max_in_flight, 32);
        assert_eq!(config.batch_linger_us, 0);
        assert_eq!(config.max_batch_size, 1);
    }

    #[test]
    fn test_stats_snapshot() {
        let stats = PipelineStatsSnapshot {
            requests_sent: 100,
            responses_received: 95,
            batches_flushed: 10,
            timeouts: 5,
        };

        assert_eq!(stats.in_flight(), 5);
        assert!((stats.success_rate() - 0.95).abs() < 0.001);
    }

    #[test]
    fn test_stats_snapshot_empty() {
        let stats = PipelineStatsSnapshot {
            requests_sent: 0,
            responses_received: 0,
            batches_flushed: 0,
            timeouts: 0,
        };

        assert_eq!(stats.in_flight(), 0);
        assert!((stats.success_rate() - 1.0).abs() < 0.001);
    }
}