pg-wired 0.4.0 - Docs.rs

//! Async split sender/receiver connection.
//! Inspired by hsqlx's PgWire.Async architecture.
//!
//! A single TCP connection is shared by many concurrent handler tasks.
//! The writer task coalesces messages from multiple requests into one write().
//! The reader task parses responses and dispatches them to waiting handlers via FIFO.

use std::collections::VecDeque;
use std::sync::Arc;

use bytes::BytesMut;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::sync::{mpsc, oneshot, Mutex};

use crate::connection::WireConn;
use crate::error::PgWireError;
use crate::protocol::backend;
use crate::protocol::frontend;
use crate::protocol::types::{BackendMsg, FormatCode, FrontendMsg, RawRow};

// ---------------------------------------------------------------------------
// Request types
// ---------------------------------------------------------------------------

/// A request to execute on the connection. Internal plumbing between the
/// public `submit` / `submit_batch` API and the writer task.
pub(crate) struct PipelineRequest {
    pub(crate) messages: BytesMut,
    pub(crate) collector: ResponseCollector,
    pub(crate) response_tx: oneshot::Sender<Result<PipelineResponse, PgWireError>>,
}

/// How to collect response messages for a request.
#[allow(dead_code)]
#[non_exhaustive]
pub enum ResponseCollector {
    /// Collect DataRows until ReadyForQuery (for SELECT queries).
    Rows,
    /// Just drain until ReadyForQuery (for setup commands like BEGIN, SET ROLE).
    Drain,
    /// Stream rows one at a time via channels. Sends header first, then individual rows.
    Stream {
        /// One-shot channel for the row description (sent once before any rows).
        header_tx: oneshot::Sender<Result<StreamHeader, PgWireError>>,
        /// Bounded channel for individual rows; closed on completion or error.
        row_tx: mpsc::Sender<Result<StreamedRow, PgWireError>>,
    },
    /// COPY IN: after receiving CopyInResponse, send the provided data then CopyDone.
    CopyIn {
        /// The data to send after CopyInResponse.
        data: Vec<u8>,
    },
    /// COPY OUT: collect CopyData messages until CopyDone.
    CopyOut,
}

/// Response from a pipeline request.
#[non_exhaustive]
pub enum PipelineResponse {
    /// A query that produced a row set (`SELECT`, `RETURNING`, etc.).
    Rows {
        /// Column metadata from RowDescription (empty if no RowDescription received).
        fields: Vec<crate::protocol::types::FieldDescription>,
        /// Row data.
        rows: Vec<RawRow>,
        /// CommandComplete tag (e.g. "SELECT 3", "INSERT 0 1").
        command_tag: String,
    },
    /// A statement that produced no row set (e.g., `BEGIN`, `SET ROLE`,
    /// non-RETURNING DML).
    Done,
}

/// Metadata sent at the start of a streaming response.
#[derive(Debug, Clone)]
pub struct StreamHeader {
    /// Column descriptions (name, OID, format) for the streamed result set.
    pub fields: Vec<crate::protocol::types::FieldDescription>,
}

/// A single streamed row.
pub type StreamedRow = RawRow;

// ---------------------------------------------------------------------------
// Async connection
// ---------------------------------------------------------------------------

/// A shared async connection that multiplexes requests from many tasks.
pub struct AsyncConn {
    request_tx: mpsc::Sender<PipelineRequest>,
    stmt_cache: std::sync::Mutex<std::collections::HashMap<String, (String, u64)>>,
    stmt_counter: std::sync::atomic::AtomicU64,
    alive: Arc<std::sync::atomic::AtomicBool>,
    backend_pid: i32,
    backend_secret: i32,
    addr: String,
    /// Channel for async notifications received during query execution.
    /// Notifications are NOT silently dropped, they're forwarded here.
    #[allow(dead_code)]
    notification_tx: mpsc::Sender<crate::protocol::types::BackendMsg>,
    notification_rx: std::sync::Mutex<Option<mpsc::Receiver<crate::protocol::types::BackendMsg>>>,
    /// True if any operation since the last `take_state_mutated()` may have
    /// left the session in a non-default state (open transaction, SET
    /// without LOCAL, advisory lock, temp table, prepared cursor, etc.).
    ///
    /// Set explicitly by callers issuing such operations
    /// (`mark_state_mutated`), and automatically by the reader task whenever
    /// ReadyForQuery reports a non-idle transaction status. Callers that
    /// only run self-contained Bind/Execute/Sync queries leave this `false`,
    /// allowing pools to skip an expensive DISCARD ALL on return.
    state_mutated: Arc<std::sync::atomic::AtomicBool>,
    /// True if a caller has declared the connection unusable (e.g., a
    /// transaction was dropped without commit/rollback, leaving the session
    /// in an unknown state). The reader/writer tasks may still be running, so
    /// `is_alive()` is true, but pools should treat the connection as broken
    /// and destroy it on return rather than reusing it.
    broken: Arc<std::sync::atomic::AtomicBool>,
    /// Cumulative count of asynchronous notifications dropped because the
    /// notification channel was full or no application code was draining it.
    /// Surfaced via [`AsyncConn::dropped_notifications`] so callers can detect
    /// missed `LISTEN` events.
    dropped_notifications: Arc<std::sync::atomic::AtomicU64>,
}

impl std::fmt::Debug for AsyncConn {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("AsyncConn")
            .field("addr", &self.addr)
            .field("backend_pid", &self.backend_pid)
            .field("alive", &self.is_alive())
            .finish()
    }
}

impl AsyncConn {
    /// Check if the connection is still alive (writer/reader tasks running).
    pub fn is_alive(&self) -> bool {
        self.alive.load(std::sync::atomic::Ordering::Relaxed)
    }

    /// Backend process ID assigned by the server.
    pub fn backend_pid(&self) -> i32 {
        self.backend_pid
    }

    /// Server address this connection is talking to.
    pub fn addr(&self) -> &str {
        &self.addr
    }

    /// Produce a cancel token for the running session on this connection.
    pub fn cancel_token(&self) -> crate::cancel::CancelToken {
        crate::cancel::CancelToken::new(self.addr.clone(), self.backend_pid, self.backend_secret)
    }

    /// Mark the connection as having mutated session state since the last
    /// reset. Pools call `take_state_mutated()` on return to decide whether
    /// to issue `DISCARD ALL`. Callers issuing `BEGIN`, `SET` (without
    /// `LOCAL`), advisory locks, temp tables, etc., should call this before
    /// submitting.
    pub fn mark_state_mutated(&self) {
        self.state_mutated
            .store(true, std::sync::atomic::Ordering::Release);
    }

    /// Atomically read and clear the state-mutated flag. Returns the
    /// previous value: `true` means the caller should issue a reset.
    pub fn take_state_mutated(&self) -> bool {
        self.state_mutated
            .swap(false, std::sync::atomic::Ordering::AcqRel)
    }

    /// Read the state-mutated flag without clearing it.
    pub fn is_state_mutated(&self) -> bool {
        self.state_mutated
            .load(std::sync::atomic::Ordering::Acquire)
    }

    /// Mark the connection as broken. The reader/writer tasks may still be
    /// running, but the session is in an indeterminate state (for example,
    /// a transaction was dropped without commit or rollback) and the
    /// connection must not be reused. Pool integrations check
    /// [`AsyncConn::is_broken`] on return and destroy the connection
    /// instead of returning it to the idle set.
    pub fn mark_broken(&self) {
        self.broken
            .store(true, std::sync::atomic::Ordering::Release);
    }

    /// True if the connection has been declared broken by a caller via
    /// [`AsyncConn::mark_broken`]. Independent of [`AsyncConn::is_alive`],
    /// which only reflects whether the reader/writer tasks are still running.
    pub fn is_broken(&self) -> bool {
        self.broken.load(std::sync::atomic::Ordering::Acquire)
    }

    /// Test-only helper that flips the `alive` flag to `false` without
    /// actually exiting the writer task. Used by pg-wired's own tests and
    /// by downstream crates' integration tests (e.g. resolute) to exercise
    /// the dead-conn branch of [`AsyncConn::enqueue_rollback`] (and any
    /// other code that gates on `is_alive`) without racing against the
    /// real task-exit timing. Not part of the stable API: the `__` prefix
    /// and `#[doc(hidden)]` mark this as off-limits for production use.
    #[doc(hidden)]
    pub fn __force_mark_dead_for_test(&self) {
        self.alive
            .store(false, std::sync::atomic::Ordering::Release);
    }

    /// Fire-and-forget enqueue of a `ROLLBACK` simple-query, intended to be
    /// callable from a synchronous `Drop`. Returns `true` if the request was
    /// queued on the writer task, `false` if the connection is not alive or
    /// the channel was full/closed (in which case the caller should fall
    /// back to [`AsyncConn::mark_broken`] so the connection is discarded
    /// by the pool).
    ///
    /// PostgreSQL accepts `ROLLBACK` from any in-transaction state — including
    /// the aborted state (`25P02`) that a failed query leaves behind — so this
    /// reliably restores the session to idle. The response is drained and
    /// discarded; ordering on the writer queue is preserved, so any
    /// subsequent request (e.g., the pool's `DISCARD ALL` reset) sees a clean
    /// connection.
    pub fn enqueue_rollback(&self) -> bool {
        if !self.is_alive() {
            return false;
        }
        try_enqueue_rollback(&self.request_tx)
    }
}

/// Inner helper for [`AsyncConn::enqueue_rollback`]: encodes a `ROLLBACK`
/// simple-query and tries to push it onto the writer's request channel.
/// Extracted so the channel-full and channel-closed branches can be unit
/// tested without instantiating a real `AsyncConn`.
fn try_enqueue_rollback(request_tx: &mpsc::Sender<PipelineRequest>) -> bool {
    let mut buf = BytesMut::with_capacity(16);
    frontend::encode_message(&FrontendMsg::Query(b"ROLLBACK"), &mut buf);
    let (tx, _rx) = oneshot::channel();
    request_tx
        .try_send(PipelineRequest {
            messages: buf,
            collector: ResponseCollector::Drain,
            response_tx: tx,
        })
        .is_ok()
}

struct PendingResponse {
    collector: ResponseCollector,
    response_tx: oneshot::Sender<Result<PipelineResponse, PgWireError>>,
}

impl AsyncConn {
    /// Create a new async connection from a raw WireConn.
    /// Spawns writer and reader tasks.
    pub fn new(conn: WireConn) -> Self {
        let backend_pid = conn.pid;
        let backend_secret = conn.secret;
        // Extract peer address before consuming the stream.
        let addr = conn
            .stream
            .peer_addr()
            .map(|a| a.to_string())
            .unwrap_or_default();

        let (notification_tx, notification_rx) = mpsc::channel(4096);
        let (request_tx, request_rx) = mpsc::channel::<PipelineRequest>(256);
        let pending: Arc<Mutex<VecDeque<PendingResponse>>> = Arc::new(Mutex::new(VecDeque::new()));
        let pending_notify = Arc::new(tokio::sync::Notify::new());
        let alive = Arc::new(std::sync::atomic::AtomicBool::new(true));
        let state_mutated = Arc::new(std::sync::atomic::AtomicBool::new(false));
        let broken = Arc::new(std::sync::atomic::AtomicBool::new(false));
        let dropped_notifications = Arc::new(std::sync::atomic::AtomicU64::new(0));

        let (stream_read, stream_write) = tokio::io::split(conn.into_stream());

        // Spawn writer task — sets alive=false on exit.
        {
            let pending = Arc::clone(&pending);
            let pending_notify = Arc::clone(&pending_notify);
            let alive = Arc::clone(&alive);
            tokio::spawn(async move {
                writer_task(request_rx, stream_write, pending, pending_notify).await;
                alive.store(false, std::sync::atomic::Ordering::Relaxed);
                tracing::warn!("pg-wired writer task exited");
            });
        }

        // Spawn reader task — sets alive=false on exit.
        {
            let pending = Arc::clone(&pending);
            let pending_notify = Arc::clone(&pending_notify);
            let alive_clone = Arc::clone(&alive);
            let state_mutated = Arc::clone(&state_mutated);
            let ntf_tx = notification_tx.clone();
            let dropped = Arc::clone(&dropped_notifications);
            tokio::spawn(async move {
                reader_task(
                    stream_read,
                    pending,
                    pending_notify,
                    ntf_tx,
                    state_mutated,
                    dropped,
                )
                .await;
                alive_clone.store(false, std::sync::atomic::Ordering::Relaxed);
                tracing::warn!("pg-wired reader task exited");
            });
        }

        Self {
            request_tx,
            stmt_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
            stmt_counter: std::sync::atomic::AtomicU64::new(0),
            alive,
            backend_pid,
            backend_secret,
            addr,
            notification_tx,
            notification_rx: std::sync::Mutex::new(Some(notification_rx)),
            state_mutated,
            broken,
            dropped_notifications,
        }
    }

    /// Cumulative number of `NotificationResponse` messages this connection
    /// has discarded since it was created.
    ///
    /// Notifications are dropped when (a) the application has not called
    /// [`AsyncConn::take_notification_receiver`] yet, or (b) the receiver is
    /// not draining fast enough and the bounded channel fills up. Compare
    /// successive readings to detect missed `LISTEN` events.
    pub fn dropped_notifications(&self) -> u64 {
        self.dropped_notifications
            .load(std::sync::atomic::Ordering::Relaxed)
    }

    /// Take the notification receiver. Call once to get a channel that
    /// receives `NotificationResponse` messages that arrive during queries.
    pub fn take_notification_receiver(
        &self,
    ) -> Option<mpsc::Receiver<crate::protocol::types::BackendMsg>> {
        self.notification_rx
            .lock()
            .ok()
            .and_then(|mut guard| guard.take())
    }

    /// Look up or allocate a statement name.
    ///
    /// Cache hit: returns the cached name with `needs_parse=false`. The
    /// caller submits only `Bind/Execute/Sync`.
    ///
    /// Cache miss: allocates a fresh, unique name from the connection's
    /// statement counter and returns `(name, needs_parse=true)`. The name
    /// is NOT yet published in the cache: the caller MUST include a
    /// `Parse` for the new name in the same atomic submit as
    /// `Bind/Execute/Sync` (so the Parse runs inside whatever
    /// role-switched transaction the caller has framed, e.g. `BEGIN; SET
    /// LOCAL ROLE …; …`), and then call [`Self::cache_statement`] to
    /// publish the name only after the Parse has succeeded on the wire.
    ///
    /// Why publish-after-success: an earlier version pre-queued the
    /// Parse as its own writer request and published the cache entry
    /// up-front to avoid a race where a concurrent caller saw the
    /// cached name and submitted a Bind-only request that races ahead
    /// of the Parse. That eliminated the race, but ran the Parse
    /// outside any transaction, under the connection's persistent role
    /// (e.g. PostgREST's `authenticator`). SQL that references objects
    /// only reachable after `SET LOCAL ROLE` to a user role failed
    /// with `42501 permission denied` during Parse, while every
    /// subsequent Bind for the same name failed with `26000: prepared
    /// statement "sN" does not exist`. Publishing only after a
    /// successful Parse keeps caching role-correct: each first-time
    /// concurrent caller pays for its own Parse (rather than sharing a
    /// pre-queued one), and `cache_statement` uses first-publisher-wins
    /// semantics so the losing names become session-bounded orphans
    /// (bounded by the 256-entry LRU on this connection).
    pub fn lookup_or_alloc(&self, sql: &str, _param_oids: &[u32]) -> (Vec<u8>, bool) {
        let cache = match self.stmt_cache.lock() {
            Ok(c) => c,
            Err(poisoned) => poisoned.into_inner(),
        };
        if let Some((name, _)) = cache.get(sql) {
            return (name.as_bytes().to_vec(), false);
        }
        // Allocate a unique name. Counters never collide, so concurrent
        // misses get distinct names. The cache stays empty for `sql`
        // until the caller calls `cache_statement` after a successful
        // Parse.
        let n = self
            .stmt_counter
            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
        let name = format!("s{n}");
        (name.into_bytes(), true)
    }

    /// Publish a freshly Parsed statement in the cache so subsequent
    /// lookups for the same SQL skip the Parse step.
    ///
    /// Called by the high-level `exec_*` helpers (and any external
    /// caller of [`Self::lookup_or_alloc`]) after the writer submit that
    /// included `Parse` for `name` returned successfully. Skipping this
    /// step doesn't cause correctness problems; the next lookup just
    /// misses and re-Parses.
    ///
    /// First-publisher-wins: if another concurrent miss already
    /// published a different name for the same SQL, that name stays in
    /// the cache and the caller's name becomes a server-side orphan
    /// (cleaned up at session end; bounded by LRU eviction during the
    /// session).
    ///
    /// LRU eviction: when the cache reaches its 256-entry cap, the
    /// oldest entry by counter is removed and a `Close + Sync` is
    /// fire-and-forget queued to free the server-side prepared
    /// statement.
    pub fn cache_statement(&self, sql: &str, name: &[u8]) {
        let Ok(name_str) = std::str::from_utf8(name) else {
            return;
        };
        let counter = name_str
            .strip_prefix('s')
            .and_then(|s| s.parse::<u64>().ok())
            .unwrap_or_else(|| self.stmt_counter.load(std::sync::atomic::Ordering::Relaxed));
        let mut cache = match self.stmt_cache.lock() {
            Ok(c) => c,
            Err(poisoned) => poisoned.into_inner(),
        };
        if cache.contains_key(sql) {
            return;
        }
        if cache.len() >= 256 {
            if let Some((oldest_key, oldest_name)) = cache
                .iter()
                .min_by_key(|(_, (_, counter))| *counter)
                .map(|(k, (name, _))| (k.clone(), name.clone()))
            {
                cache.remove(&oldest_key);
                let mut close_buf = BytesMut::with_capacity(32);
                frontend::encode_message(
                    &FrontendMsg::Close {
                        kind: b'S',
                        name: oldest_name.as_bytes(),
                    },
                    &mut close_buf,
                );
                frontend::encode_message(&FrontendMsg::Sync, &mut close_buf);
                let (tx, _rx) = oneshot::channel();
                let _ = self.request_tx.try_send(PipelineRequest {
                    messages: close_buf,
                    collector: ResponseCollector::Drain,
                    response_tx: tx,
                });
            }
        }
        cache.insert(sql.to_string(), (name_str.to_string(), counter));
    }

    /// Execute COPY FROM STDIN: sends the COPY command, then data in chunks, then CopyDone.
    /// Returns the number of rows copied (from CommandComplete tag).
    ///
    /// Data is sent in chunks of up to 1MB to avoid buffering the entire payload
    /// in a single BytesMut. For small payloads (< 1MB), this is a single write.
    pub async fn copy_in(&self, copy_sql: &str, data: &[u8]) -> Result<u64, PgWireError> {
        use crate::protocol::types::FrontendMsg;
        const CHUNK_SIZE: usize = 1024 * 1024; // 1MB chunks

        // Build the message buffer: Query + chunked CopyData + CopyDone.
        let mut buf = BytesMut::with_capacity(copy_sql.len() + data.len().min(CHUNK_SIZE) + 64);
        frontend::encode_message(&FrontendMsg::Query(copy_sql.as_bytes()), &mut buf);

        // Send data in chunks to avoid a single huge allocation.
        for chunk in data.chunks(CHUNK_SIZE) {
            frontend::encode_message(&FrontendMsg::CopyData(chunk), &mut buf);
        }
        // Empty data is valid (0 rows copied).
        frontend::encode_message(&FrontendMsg::CopyDone, &mut buf);

        let resp = self
            .submit(buf, ResponseCollector::CopyIn { data: Vec::new() })
            .await?;
        match resp {
            PipelineResponse::Rows { command_tag, .. } => Ok(parse_copy_count(&command_tag)),
            PipelineResponse::Done => Ok(0),
        }
    }

    /// Execute COPY FROM STDIN with streaming: sends the COPY command, then
    /// reads data from an async reader in chunks, avoiding buffering the entire
    /// payload in memory.
    ///
    /// ```no_run
    /// # async fn _doctest() -> Result<(), Box<dyn std::error::Error>> {
    /// # let conn: pg_wired::AsyncConn = unimplemented!();
    /// use tokio::fs::File;
    /// let file = File::open("data.csv").await?;
    /// let _count = conn.copy_in_stream("COPY users FROM STDIN WITH (FORMAT csv)", file).await?;
    /// # Ok(()) }
    /// ```
    pub async fn copy_in_stream<R: tokio::io::AsyncRead + Unpin>(
        &self,
        copy_sql: &str,
        mut reader: R,
    ) -> Result<u64, PgWireError> {
        use tokio::io::AsyncReadExt;
        const CHUNK_SIZE: usize = 1024 * 1024; // 1MB chunks

        // Send the COPY command.
        let mut buf = BytesMut::with_capacity(copy_sql.len() + 16);
        frontend::encode_message(&FrontendMsg::Query(copy_sql.as_bytes()), &mut buf);

        // Read and send data in chunks.
        let mut chunk = vec![0u8; CHUNK_SIZE];
        loop {
            let n = reader.read(&mut chunk).await?;
            if n == 0 {
                break;
            }
            frontend::encode_message(&FrontendMsg::CopyData(&chunk[..n]), &mut buf);
        }
        frontend::encode_message(&FrontendMsg::CopyDone, &mut buf);

        let resp = self
            .submit(buf, ResponseCollector::CopyIn { data: Vec::new() })
            .await?;
        match resp {
            PipelineResponse::Rows { command_tag, .. } => Ok(parse_copy_count(&command_tag)),
            PipelineResponse::Done => Ok(0),
        }
    }

    /// Execute COPY TO STDOUT: sends the COPY command, collects all CopyData.
    pub async fn copy_out(&self, copy_sql: &str) -> Result<Vec<u8>, PgWireError> {
        use crate::protocol::types::FrontendMsg;
        let mut buf = BytesMut::new();
        frontend::encode_message(&FrontendMsg::Query(copy_sql.as_bytes()), &mut buf);

        let resp = self.submit(buf, ResponseCollector::CopyOut).await?;
        match resp {
            PipelineResponse::Rows { rows, .. } => {
                // For CopyOut, we reuse the Rows variant but each `RawRow` carries
                // one cell which is the raw COPY data chunk (see `collect_copy_out`).
                let mut result = Vec::new();
                for row in rows {
                    for data in row.iter().flatten() {
                        result.extend_from_slice(data);
                    }
                }
                Ok(result)
            }
            PipelineResponse::Done => Ok(Vec::new()),
        }
    }

    /// Evict a SQL statement from the cache, forcing re-parse on next use.
    /// Used for prepared statement invalidation after schema changes.
    pub fn invalidate_statement(&self, sql: &str) {
        let mut cache = match self.stmt_cache.lock() {
            Ok(c) => c,
            Err(poisoned) => poisoned.into_inner(),
        };
        cache.remove(sql);
    }

    /// Clear the entire statement cache. Must be called after `DISCARD ALL`
    /// which destroys server-side prepared statements.
    pub fn clear_statement_cache(&self) {
        let mut cache = match self.stmt_cache.lock() {
            Ok(c) => c,
            Err(poisoned) => poisoned.into_inner(),
        };
        cache.clear();
    }

    /// Execute a pipelined transaction with automatic statement caching.
    ///
    /// On a successful Parse the new statement name is published in the
    /// cache via [`Self::cache_statement`]. If a cached statement turns
    /// out to be invalid (PG error 26000 or 0A000), the cache entry is
    /// evicted and the transaction is retried once with a fresh Parse.
    /// This handles schema changes invalidating cached plans after their
    /// initial Parse.
    pub async fn exec_transaction(
        &self,
        setup_sql: &str,
        query_sql: &str,
        params: &[Option<&[u8]>],
        param_oids: &[u32],
    ) -> Result<Vec<RawRow>, PgWireError> {
        let (stmt_name, needs_parse) = self.lookup_or_alloc(query_sql, param_oids);
        match self
            .pipeline_transaction(
                setup_sql,
                query_sql,
                params,
                param_oids,
                &stmt_name,
                needs_parse,
            )
            .await
        {
            Ok(rows) => {
                if needs_parse {
                    self.cache_statement(query_sql, &stmt_name);
                }
                Ok(rows)
            }
            Err(PgWireError::Pg(ref pg_err))
                if !needs_parse && is_stale_statement_error(pg_err) =>
            {
                tracing::debug!(
                    sql = query_sql,
                    "prepared statement invalidated — re-parsing in transaction"
                );
                self.invalidate_statement(query_sql);
                let (stmt_name, _) = self.lookup_or_alloc(query_sql, param_oids);
                let result = self
                    .pipeline_transaction(
                        setup_sql, query_sql, params, param_oids, &stmt_name, true,
                    )
                    .await;
                if result.is_ok() {
                    self.cache_statement(query_sql, &stmt_name);
                }
                result
            }
            Err(e) => Err(e),
        }
    }

    /// Execute a parameterized query with automatic statement caching.
    /// If a cached statement is invalidated by a schema change (PG error 26000
    /// or 0A000), automatically evicts the cache entry, re-parses, and retries once.
    pub async fn exec_query(
        &self,
        sql: &str,
        params: &[Option<&[u8]>],
        param_oids: &[u32],
    ) -> Result<Vec<RawRow>, PgWireError> {
        let (stmt_name, needs_parse) = self.lookup_or_alloc(sql, param_oids);
        match self
            .query(sql, params, param_oids, &stmt_name, needs_parse)
            .await
        {
            Ok(rows) => {
                if needs_parse {
                    self.cache_statement(sql, &stmt_name);
                }
                Ok(rows)
            }
            Err(PgWireError::Pg(ref pg_err))
                if !needs_parse && is_stale_statement_error(pg_err) =>
            {
                tracing::debug!(sql = sql, "prepared statement invalidated — re-parsing");
                self.invalidate_statement(sql);
                let (stmt_name, _) = self.lookup_or_alloc(sql, param_oids);
                let result = self.query(sql, params, param_oids, &stmt_name, true).await;
                if result.is_ok() {
                    self.cache_statement(sql, &stmt_name);
                }
                result
            }
            Err(e) => Err(e),
        }
    }

    /// Maximum time to wait for a response from the reader task.
    /// Prevents hanging forever if the reader/writer task dies mid-request.
    const REQUEST_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);

    /// Submit a request to the connection. Returns a future that resolves
    /// when the response is available. Times out after 5 minutes to prevent
    /// hanging forever if the reader/writer task dies.
    pub async fn submit(
        &self,
        messages: BytesMut,
        collector: ResponseCollector,
    ) -> Result<PipelineResponse, PgWireError> {
        let (response_tx, response_rx) = oneshot::channel();
        let req = PipelineRequest {
            messages,
            collector,
            response_tx,
        };
        self.request_tx
            .send(req)
            .await
            .map_err(|_| PgWireError::ConnectionClosed)?;
        match tokio::time::timeout(Self::REQUEST_TIMEOUT, response_rx).await {
            Ok(Ok(result)) => result,
            Ok(Err(_)) => Err(PgWireError::ConnectionClosed),
            Err(_elapsed) => {
                tracing::error!(
                    "request timed out after {:?} — reader/writer task may be dead",
                    Self::REQUEST_TIMEOUT
                );
                Err(PgWireError::ConnectionClosed)
            }
        }
    }

    /// Submit a batch of requests in FIFO order. All requests are queued
    /// before any response is awaited, so the writer task sees them together
    /// and coalesces them into a single write() syscall. The server then
    /// pipelines the N responses back-to-back, giving one network round-trip
    /// for all N queries.
    ///
    /// Returns one `Result<PipelineResponse, PgWireError>` per input item,
    /// in the same order. The outer `Result` fails only if queueing fails
    /// (channel closed). Each inner `Result` reflects the per-query outcome.
    pub async fn submit_batch(
        &self,
        items: Vec<(BytesMut, ResponseCollector)>,
    ) -> Result<Vec<Result<PipelineResponse, PgWireError>>, PgWireError> {
        let mut receivers = Vec::with_capacity(items.len());
        for (messages, collector) in items {
            let (response_tx, response_rx) = oneshot::channel();
            self.request_tx
                .send(PipelineRequest {
                    messages,
                    collector,
                    response_tx,
                })
                .await
                .map_err(|_| PgWireError::ConnectionClosed)?;
            receivers.push(response_rx);
        }
        let mut results = Vec::with_capacity(receivers.len());
        for rx in receivers {
            match tokio::time::timeout(Self::REQUEST_TIMEOUT, rx).await {
                Ok(Ok(r)) => results.push(r),
                Ok(Err(_)) => results.push(Err(PgWireError::ConnectionClosed)),
                Err(_) => {
                    tracing::error!(
                        "submit_batch request timed out after {:?}",
                        Self::REQUEST_TIMEOUT
                    );
                    results.push(Err(PgWireError::ConnectionClosed));
                }
            }
        }
        Ok(results)
    }

    /// Send a Terminate message to the server and wait for the writer/reader
    /// tasks to exit. After this returns, the connection is unusable; further
    /// calls fail with `ConnectionClosed`. Idempotent: calling `close` on an
    /// already-closed connection is a no-op and returns `Ok`.
    pub async fn close(&self) -> Result<(), PgWireError> {
        if !self.is_alive() {
            return Ok(());
        }
        let mut buf = BytesMut::with_capacity(5);
        frontend::encode_message(&FrontendMsg::Terminate, &mut buf);
        // Submit Terminate through the writer so ordering is preserved wrt
        // any in-flight requests ahead of us. The server replies with nothing
        // and closes the socket, so we expect `ConnectionClosed` back from
        // the drain collector — treat that as a successful close.
        match self.submit(buf, ResponseCollector::Drain).await {
            Ok(_) | Err(PgWireError::ConnectionClosed) => Ok(()),
            Err(PgWireError::Io(e)) if e.kind() == std::io::ErrorKind::BrokenPipe => Ok(()),
            Err(e) => Err(e),
        }
    }

    /// Submit a streaming request. Returns the column header and an mpsc receiver
    /// that yields rows one at a time.
    pub async fn submit_stream(
        &self,
        messages: BytesMut,
        row_buffer: usize,
    ) -> Result<
        (
            StreamHeader,
            mpsc::Receiver<Result<StreamedRow, PgWireError>>,
        ),
        PgWireError,
    > {
        let (header_tx, header_rx) = oneshot::channel();
        let (row_tx, row_rx) = mpsc::channel(row_buffer);
        let (response_tx, _response_rx) = oneshot::channel();
        let req = PipelineRequest {
            messages,
            collector: ResponseCollector::Stream { header_tx, row_tx },
            response_tx,
        };
        self.request_tx
            .send(req)
            .await
            .map_err(|_| PgWireError::ConnectionClosed)?;
        let header = header_rx
            .await
            .map_err(|_| PgWireError::ConnectionClosed)??;
        Ok((header, row_rx))
    }

    /// Execute a pipelined transaction:
    /// setup (simple query) + data query (extended protocol) + COMMIT (simple query)
    /// All coalesced into one TCP write. Binary-safe parameterized data query.
    pub async fn pipeline_transaction(
        &self,
        setup_sql: &str,
        query_sql: &str,
        params: &[Option<&[u8]>],
        param_oids: &[u32],
        stmt_name: &[u8],
        needs_parse: bool,
    ) -> Result<Vec<RawRow>, PgWireError> {
        let mut buf = BytesMut::with_capacity(1024);

        // 1. Simple query for setup (BEGIN + SET ROLE + set_config).
        frontend::encode_message(&FrontendMsg::Query(setup_sql.as_bytes()), &mut buf);

        // Submit setup as Drain — we don't care about its response data.
        let setup_msgs = buf.split();

        // 2. Extended query for data.
        let text_fmts: Vec<FormatCode> = vec![FormatCode::Text; params.len().max(1)];
        let result_fmts = [FormatCode::Text];

        if needs_parse {
            frontend::encode_message(
                &FrontendMsg::Parse {
                    name: stmt_name,
                    sql: query_sql.as_bytes(),
                    param_oids,
                },
                &mut buf,
            );
        }

        frontend::encode_message(
            &FrontendMsg::Bind {
                portal: b"",
                statement: stmt_name,
                param_formats: &text_fmts[..params.len()],
                params,
                result_formats: &result_fmts,
            },
            &mut buf,
        );

        frontend::encode_message(
            &FrontendMsg::Execute {
                portal: b"",
                max_rows: 0,
            },
            &mut buf,
        );

        frontend::encode_message(&FrontendMsg::Sync, &mut buf);

        let data_msgs = buf.split();

        // 3. Simple query for COMMIT — in its own buffer so each request
        // carries exactly the bytes that produce its ReadyForQuery response.
        let mut commit_buf = BytesMut::with_capacity(32);
        frontend::encode_message(&FrontendMsg::Query(b"COMMIT"), &mut commit_buf);

        // Submit all three as separate requests with different collectors.
        // They'll be coalesced by the writer into one write() syscall.
        let (setup_tx, setup_rx) = oneshot::channel();
        let (data_tx, data_rx) = oneshot::channel();
        let (commit_tx, commit_rx) = oneshot::channel();

        // Send all three requests to the writer channel.
        // The writer drains the channel and writes them all at once.
        self.request_tx
            .send(PipelineRequest {
                messages: setup_msgs,
                collector: ResponseCollector::Drain,
                response_tx: setup_tx,
            })
            .await
            .map_err(|_| PgWireError::ConnectionClosed)?;

        self.request_tx
            .send(PipelineRequest {
                messages: data_msgs,
                collector: ResponseCollector::Rows,
                response_tx: data_tx,
            })
            .await
            .map_err(|_| PgWireError::ConnectionClosed)?;

        self.request_tx
            .send(PipelineRequest {
                messages: commit_buf,
                collector: ResponseCollector::Drain,
                response_tx: commit_tx,
            })
            .await
            .map_err(|_| PgWireError::ConnectionClosed)?;

        // Wait for all responses.
        setup_rx
            .await
            .map_err(|_| PgWireError::ConnectionClosed)??;

        let data_resp = data_rx.await.map_err(|_| PgWireError::ConnectionClosed)??;

        commit_rx
            .await
            .map_err(|_| PgWireError::ConnectionClosed)??;

        match data_resp {
            PipelineResponse::Rows { rows, .. } => Ok(rows),
            PipelineResponse::Done => Ok(Vec::new()),
        }
    }

    /// Execute a simple parameterized query (no transaction).
    pub async fn query(
        &self,
        sql: &str,
        params: &[Option<&[u8]>],
        param_oids: &[u32],
        stmt_name: &[u8],
        needs_parse: bool,
    ) -> Result<Vec<RawRow>, PgWireError> {
        self.query_with_formats(sql, params, param_oids, &[], &[], stmt_name, needs_parse)
            .await
    }

    /// Execute a parameterized query with explicit per-param and per-result
    /// format codes (text = 0, binary = 1).
    ///
    /// `param_formats` is interpreted per PostgreSQL wire protocol rules:
    /// - empty: all params are text
    /// - length 1: the single code applies to every param
    /// - length N (== params.len()): one code per param
    ///
    /// Same rules apply to `result_formats` for output columns (empty → all
    /// text; single code → applies to all columns; per-column list otherwise).
    #[allow(clippy::too_many_arguments)]
    pub async fn query_with_formats(
        &self,
        sql: &str,
        params: &[Option<&[u8]>],
        param_oids: &[u32],
        param_formats: &[FormatCode],
        result_formats: &[FormatCode],
        stmt_name: &[u8],
        needs_parse: bool,
    ) -> Result<Vec<RawRow>, PgWireError> {
        let mut buf = BytesMut::with_capacity(512);

        // Default to all-text if caller passes empty slices.
        let text_param_fmts: Vec<FormatCode>;
        let param_fmts_slice: &[FormatCode] = if param_formats.is_empty() {
            text_param_fmts = vec![FormatCode::Text; params.len().max(1)];
            &text_param_fmts[..params.len()]
        } else {
            param_formats
        };
        let default_result_fmts = [FormatCode::Text];
        let result_fmts_slice: &[FormatCode] = if result_formats.is_empty() {
            &default_result_fmts
        } else {
            result_formats
        };

        if needs_parse {
            frontend::encode_message(
                &FrontendMsg::Parse {
                    name: stmt_name,
                    sql: sql.as_bytes(),
                    param_oids,
                },
                &mut buf,
            );
        }

        frontend::encode_message(
            &FrontendMsg::Bind {
                portal: b"",
                statement: stmt_name,
                param_formats: param_fmts_slice,
                params,
                result_formats: result_fmts_slice,
            },
            &mut buf,
        );

        frontend::encode_message(
            &FrontendMsg::Execute {
                portal: b"",
                max_rows: 0,
            },
            &mut buf,
        );

        frontend::encode_message(&FrontendMsg::Sync, &mut buf);

        let resp = self.submit(buf, ResponseCollector::Rows).await?;
        match resp {
            PipelineResponse::Rows { rows, .. } => Ok(rows),
            PipelineResponse::Done => Ok(Vec::new()),
        }
    }

    /// Variant of `exec_query` with per-param and per-result format codes.
    /// See `query_with_formats` for format code semantics.
    pub async fn exec_query_with_formats(
        &self,
        sql: &str,
        params: &[Option<&[u8]>],
        param_oids: &[u32],
        param_formats: &[FormatCode],
        result_formats: &[FormatCode],
    ) -> Result<Vec<RawRow>, PgWireError> {
        let (stmt_name, needs_parse) = self.lookup_or_alloc(sql, param_oids);
        match self
            .query_with_formats(
                sql,
                params,
                param_oids,
                param_formats,
                result_formats,
                &stmt_name,
                needs_parse,
            )
            .await
        {
            Ok(rows) => {
                if needs_parse {
                    self.cache_statement(sql, &stmt_name);
                }
                Ok(rows)
            }
            Err(PgWireError::Pg(ref pg_err))
                if !needs_parse && is_stale_statement_error(pg_err) =>
            {
                tracing::debug!(sql = sql, "prepared statement invalidated — re-parsing");
                self.invalidate_statement(sql);
                let (stmt_name, _) = self.lookup_or_alloc(sql, param_oids);
                let result = self
                    .query_with_formats(
                        sql,
                        params,
                        param_oids,
                        param_formats,
                        result_formats,
                        &stmt_name,
                        true,
                    )
                    .await;
                if result.is_ok() {
                    self.cache_statement(sql, &stmt_name);
                }
                result
            }
            Err(e) => Err(e),
        }
    }
}

// ---------------------------------------------------------------------------
// Writer task
// ---------------------------------------------------------------------------

async fn writer_task(
    mut rx: mpsc::Receiver<PipelineRequest>,
    mut stream: tokio::io::WriteHalf<crate::tls::MaybeTlsStream>,
    pending: Arc<Mutex<VecDeque<PendingResponse>>>,
    pending_notify: Arc<tokio::sync::Notify>,
) {
    let mut write_buf = BytesMut::with_capacity(8192);

    loop {
        // Wait for the first request.
        let first = match rx.recv().await {
            Some(req) => req,
            None => {
                // Channel closed — drain any pending responses with ConnectionClosed.
                drain_pending_on_exit(&pending).await;
                return;
            }
        };

        // Drain any additional queued requests (batch coalescing).
        write_buf.clear();
        write_buf.extend_from_slice(&first.messages);

        let mut batch: Vec<PendingResponse> = vec![PendingResponse {
            collector: first.collector,
            response_tx: first.response_tx,
        }];

        // Non-blocking drain of all queued requests.
        while let Ok(req) = rx.try_recv() {
            write_buf.extend_from_slice(&req.messages);
            batch.push(PendingResponse {
                collector: req.collector,
                response_tx: req.response_tx,
            });
        }

        // ONE write() syscall for all coalesced messages.
        // Write BEFORE enqueuing pending responses — if the write fails,
        // we send errors to callers instead of leaving them hanging.
        let write_result = stream.write_all(&write_buf).await;
        let write_err = match write_result {
            Ok(_) => stream.flush().await.err(),
            Err(e) => Some(e),
        };

        if let Some(e) = write_err {
            tracing::error!("Writer error: {e}");
            let msg = e.to_string();
            for p in batch {
                let _ = p.response_tx.send(Err(PgWireError::Io(std::io::Error::new(
                    std::io::ErrorKind::BrokenPipe,
                    msg.clone(),
                ))));
            }
            // Drain any already-pending responses so the reader doesn't hang.
            drain_pending_on_exit(&pending).await;
            return;
        }

        // Write succeeded — enqueue pending responses for the reader.
        {
            let mut pq = pending.lock().await;
            for p in batch {
                pq.push_back(p);
            }
        }
        // Wake the reader task to process the newly enqueued responses.
        pending_notify.notify_one();
    }
}

/// On writer exit, drain all pending responses with ConnectionClosed errors
/// so callers don't wait for the 5-minute timeout.
async fn drain_pending_on_exit(pending: &Arc<Mutex<VecDeque<PendingResponse>>>) {
    let mut pq = pending.lock().await;
    while let Some(pr) = pq.pop_front() {
        let _ = pr.response_tx.send(Err(PgWireError::ConnectionClosed));
    }
}

// ---------------------------------------------------------------------------
// Reader task
// ---------------------------------------------------------------------------

async fn reader_task(
    mut stream: tokio::io::ReadHalf<crate::tls::MaybeTlsStream>,
    pending: Arc<Mutex<VecDeque<PendingResponse>>>,
    pending_notify: Arc<tokio::sync::Notify>,
    notification_tx: mpsc::Sender<BackendMsg>,
    state_mutated: Arc<std::sync::atomic::AtomicBool>,
    dropped_notifications: Arc<std::sync::atomic::AtomicU64>,
) {
    let mut recv_buf = BytesMut::with_capacity(32 * 1024);

    loop {
        // Wait for a pending response to become available.
        let pr = loop {
            {
                let mut pq = pending.lock().await;
                if let Some(pr) = pq.pop_front() {
                    break pr;
                }
            }
            // No pending — wait for the writer to signal.
            pending_notify.notified().await;
        };

        // Collect the response based on the collector type.
        let result = match pr.collector {
            ResponseCollector::Rows => {
                collect_rows(
                    &mut stream,
                    &mut recv_buf,
                    &notification_tx,
                    &state_mutated,
                    &dropped_notifications,
                )
                .await
            }
            ResponseCollector::Drain => {
                drain_until_ready(&mut stream, &mut recv_buf, Some(&state_mutated))
                    .await
                    .map(|_| PipelineResponse::Done)
            }
            ResponseCollector::Stream { header_tx, row_tx } => {
                stream_rows(
                    &mut stream,
                    &mut recv_buf,
                    header_tx,
                    row_tx,
                    &notification_tx,
                    &state_mutated,
                    &dropped_notifications,
                )
                .await;
                Ok(PipelineResponse::Done)
            }
            ResponseCollector::CopyIn { .. } => {
                collect_copy_in_response(&mut stream, &mut recv_buf, &state_mutated).await
            }
            ResponseCollector::CopyOut => {
                collect_copy_out(&mut stream, &mut recv_buf, &state_mutated).await
            }
        };

        // Send the response back to the caller.
        let _ = pr.response_tx.send(result);
    }
}

async fn read_msg(
    stream: &mut tokio::io::ReadHalf<crate::tls::MaybeTlsStream>,
    buf: &mut BytesMut,
) -> Result<BackendMsg, PgWireError> {
    loop {
        if let Some(msg) = backend::parse_message(buf).map_err(PgWireError::Protocol)? {
            return Ok(msg);
        }
        let n = stream.read_buf(buf).await?;
        if n == 0 {
            // EOF — try to parse any remaining data in the buffer before giving up.
            // This handles the case where the last message arrived just before the
            // connection closed and is already fully buffered.
            if let Some(msg) = backend::parse_message(buf).map_err(PgWireError::Protocol)? {
                return Ok(msg);
            }
            return Err(PgWireError::ConnectionClosed);
        }
    }
}

/// If the ReadyForQuery status byte is anything other than `I` (idle),
/// flag the connection as state-mutated. `T` (in transaction) and `E`
/// (failed transaction) both leave session state that needs DISCARD ALL.
fn note_rfq_status(status: u8, state_mutated: &std::sync::atomic::AtomicBool) {
    if status != b'I' {
        state_mutated.store(true, std::sync::atomic::Ordering::Release);
    }
}

async fn collect_rows(
    stream: &mut tokio::io::ReadHalf<crate::tls::MaybeTlsStream>,
    buf: &mut BytesMut,
    notification_tx: &mpsc::Sender<BackendMsg>,
    state_mutated: &std::sync::atomic::AtomicBool,
    dropped_notifications: &std::sync::atomic::AtomicU64,
) -> Result<PipelineResponse, PgWireError> {
    let mut rows = Vec::new();
    let mut fields = Vec::new();
    let mut command_tag = String::new();
    loop {
        let msg = read_msg(stream, buf).await?;
        match msg {
            BackendMsg::DataRow(row) => rows.push(row),
            BackendMsg::RowDescription { fields: f } => fields = f,
            BackendMsg::CommandComplete { tag } => command_tag = tag,
            BackendMsg::ReadyForQuery { status } => {
                note_rfq_status(status, state_mutated);
                return Ok(PipelineResponse::Rows {
                    fields,
                    rows,
                    command_tag,
                });
            }
            BackendMsg::ErrorResponse { fields } => {
                drain_until_ready(stream, buf, Some(state_mutated)).await?;
                return Err(PgWireError::Pg(fields));
            }
            msg @ BackendMsg::NotificationResponse { .. } => {
                // Forward notification instead of dropping.
                #[allow(clippy::collapsible_match)]
                if notification_tx.try_send(msg).is_err() {
                    dropped_notifications.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
                    tracing::warn!("notification channel full, dropping notification");
                }
            }
            BackendMsg::ParseComplete
            | BackendMsg::BindComplete
            | BackendMsg::NoData
            | BackendMsg::NoticeResponse { .. }
            | BackendMsg::EmptyQueryResponse => {}
            _ => {}
        }
    }
}

async fn drain_until_ready(
    stream: &mut tokio::io::ReadHalf<crate::tls::MaybeTlsStream>,
    buf: &mut BytesMut,
    state_mutated: Option<&std::sync::atomic::AtomicBool>,
) -> Result<(), PgWireError> {
    loop {
        let msg = read_msg(stream, buf).await?;
        if let BackendMsg::ReadyForQuery { status } = msg {
            if let Some(sm) = state_mutated {
                note_rfq_status(status, sm);
            }
            return Ok(());
        }
        if let BackendMsg::ErrorResponse { ref fields } = msg {
            tracing::warn!("Error in drain: {}: {}", fields.code, fields.message);
        }
    }
}

/// Stream rows one at a time, sending header first, then individual rows.
async fn stream_rows(
    stream: &mut tokio::io::ReadHalf<crate::tls::MaybeTlsStream>,
    buf: &mut BytesMut,
    header_tx: oneshot::Sender<Result<StreamHeader, PgWireError>>,
    row_tx: mpsc::Sender<Result<StreamedRow, PgWireError>>,
    notification_tx: &mpsc::Sender<BackendMsg>,
    state_mutated: &std::sync::atomic::AtomicBool,
    dropped_notifications: &std::sync::atomic::AtomicU64,
) {
    let mut header_tx = Some(header_tx);
    let mut fields = Vec::new();
    loop {
        let msg = match read_msg(stream, buf).await {
            Ok(msg) => msg,
            Err(e) => {
                if let Some(htx) = header_tx.take() {
                    let _ = htx.send(Err(e));
                } else {
                    let _ = row_tx.send(Err(e)).await;
                }
                return;
            }
        };
        match msg {
            BackendMsg::RowDescription { fields: f } => {
                fields = f;
            }
            BackendMsg::DataRow(row) => {
                if let Some(htx) = header_tx.take() {
                    let _ = htx.send(Ok(StreamHeader {
                        fields: fields.clone(),
                    }));
                }
                if row_tx.send(Ok(row)).await.is_err() {
                    let _ = drain_until_ready(stream, buf, Some(state_mutated)).await;
                    return;
                }
            }
            BackendMsg::CommandComplete { .. } => {
                if let Some(htx) = header_tx.take() {
                    let _ = htx.send(Ok(StreamHeader {
                        fields: std::mem::take(&mut fields),
                    }));
                }
            }
            BackendMsg::ReadyForQuery { status } => {
                note_rfq_status(status, state_mutated);
                if let Some(htx) = header_tx.take() {
                    let _ = htx.send(Ok(StreamHeader {
                        fields: std::mem::take(&mut fields),
                    }));
                }
                return;
            }
            BackendMsg::ErrorResponse { fields: err } => {
                if let Some(htx) = header_tx.take() {
                    let _ = htx.send(Err(PgWireError::Pg(err)));
                } else {
                    let _ = row_tx.send(Err(PgWireError::Pg(err))).await;
                }
                let _ = drain_until_ready(stream, buf, Some(state_mutated)).await;
                return;
            }
            msg @ BackendMsg::NotificationResponse { .. } => {
                #[allow(clippy::collapsible_match)]
                if notification_tx.try_send(msg).is_err() {
                    dropped_notifications.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
                    tracing::warn!("notification channel full, dropping notification");
                }
            }
            BackendMsg::ParseComplete
            | BackendMsg::BindComplete
            | BackendMsg::NoData
            | BackendMsg::PortalSuspended
            | BackendMsg::NoticeResponse { .. }
            | BackendMsg::EmptyQueryResponse => {}
            _ => {}
        }
    }
}

/// Handle COPY IN response: skip CopyInResponse, wait for CommandComplete + ReadyForQuery.
/// The actual CopyData + CopyDone were pre-buffered in the write, so PG processes them.
async fn collect_copy_in_response(
    stream: &mut tokio::io::ReadHalf<crate::tls::MaybeTlsStream>,
    buf: &mut BytesMut,
    state_mutated: &std::sync::atomic::AtomicBool,
) -> Result<PipelineResponse, PgWireError> {
    let mut command_tag = String::new();
    loop {
        let msg = read_msg(stream, buf).await?;
        match msg {
            BackendMsg::CopyInResponse { .. } => {}
            BackendMsg::CommandComplete { tag } => command_tag = tag,
            BackendMsg::ReadyForQuery { status } => {
                note_rfq_status(status, state_mutated);
                return Ok(PipelineResponse::Rows {
                    fields: Vec::new(),
                    rows: Vec::new(),
                    command_tag,
                });
            }
            BackendMsg::ErrorResponse { fields } => {
                drain_until_ready(stream, buf, Some(state_mutated)).await?;
                return Err(PgWireError::Pg(fields));
            }
            _ => {}
        }
    }
}

/// Collect COPY OUT data: CopyOutResponse → CopyData* → CopyDone → CommandComplete → ReadyForQuery.
async fn collect_copy_out(
    stream: &mut tokio::io::ReadHalf<crate::tls::MaybeTlsStream>,
    buf: &mut BytesMut,
    state_mutated: &std::sync::atomic::AtomicBool,
) -> Result<PipelineResponse, PgWireError> {
    let mut data_chunks: Vec<RawRow> = Vec::new();
    let mut command_tag = String::new();
    loop {
        let msg = read_msg(stream, buf).await?;
        match msg {
            BackendMsg::CopyOutResponse { .. } => {}
            BackendMsg::CopyData { data } => {
                let body = bytes::Bytes::from(data);
                data_chunks.push(RawRow::from_full_body(body));
            }
            BackendMsg::CopyDone => {}
            BackendMsg::CommandComplete { tag } => command_tag = tag,
            BackendMsg::ReadyForQuery { status } => {
                note_rfq_status(status, state_mutated);
                return Ok(PipelineResponse::Rows {
                    fields: Vec::new(),
                    rows: data_chunks,
                    command_tag,
                });
            }
            BackendMsg::ErrorResponse { fields } => {
                drain_until_ready(stream, buf, Some(state_mutated)).await?;
                return Err(PgWireError::Pg(fields));
            }
            _ => {}
        }
    }
}

/// Check if a PostgreSQL error indicates a stale/invalidated prepared statement.
/// Error codes: 26000 (invalid_sql_statement_name), 0A000 (feature_not_supported
/// — used when cached plan changes type).
fn is_stale_statement_error(err: &crate::protocol::types::PgError) -> bool {
    matches!(err.code.as_str(), "26000" | "0A000")
}

fn parse_copy_count(tag: &str) -> u64 {
    // COPY tag format: "COPY 123"
    tag.strip_prefix("COPY ")
        .and_then(|s| s.parse::<u64>().ok())
        .unwrap_or(0)
}

// Extension to WireConn to extract the underlying stream.
impl WireConn {
    pub(crate) fn into_stream(self) -> crate::tls::MaybeTlsStream {
        self.stream
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Channel-full branch: when the request channel has no spare capacity,
    /// `try_enqueue_rollback` returns `false` instead of blocking.
    #[tokio::test]
    async fn try_enqueue_rollback_returns_false_when_channel_full() {
        let (tx, _rx) = mpsc::channel::<PipelineRequest>(2);
        // Fill the channel by reusing the same helper. capacity=2 plus the
        // single buffered slot tokio reserves means we may need to push
        // until try_send fails; loop until we observe the false return.
        let mut filled = false;
        for _ in 0..16 {
            if !try_enqueue_rollback(&tx) {
                filled = true;
                break;
            }
        }
        assert!(
            filled,
            "expected try_enqueue_rollback to eventually return false on a full channel"
        );
        assert!(
            !try_enqueue_rollback(&tx),
            "subsequent calls on a full channel must keep returning false"
        );
    }

    /// Channel-closed branch: dropping the receiver makes `try_send` fail
    /// with `Closed`, which `try_enqueue_rollback` reports as `false`.
    #[tokio::test]
    async fn try_enqueue_rollback_returns_false_when_channel_closed() {
        let (tx, rx) = mpsc::channel::<PipelineRequest>(8);
        drop(rx);
        assert!(
            !try_enqueue_rollback(&tx),
            "try_enqueue_rollback must return false when the receiver has been dropped"
        );
    }

    /// Happy path: with a live receiver and free capacity, the helper
    /// reports success and the receiver observes a queued request whose
    /// payload starts with the simple-query opcode `'Q'`.
    #[tokio::test]
    async fn try_enqueue_rollback_returns_true_and_enqueues_query() {
        let (tx, mut rx) = mpsc::channel::<PipelineRequest>(2);
        assert!(try_enqueue_rollback(&tx));
        let req = rx.recv().await.expect("request should be received");
        assert_eq!(
            req.messages.first().copied(),
            Some(b'Q'),
            "queued request should be a simple Query message"
        );
        // Body should mention ROLLBACK (text follows length prefix and is
        // null-terminated; just substring-search to keep the test simple).
        assert!(
            req.messages.windows(8).any(|w| w == b"ROLLBACK"),
            "queued request should contain the ROLLBACK statement text"
        );
    }
}