heliosdb-proxy 0.4.1

//! Connection Pool - HeliosProxy
//!
//! Manages connection pooling with configurable limits, idle timeout,
//! and health-aware connection management.

use crate::backend::{BackendClient, BackendConfig};
use crate::{NodeId, ProxyError, Result};
use std::collections::HashMap;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::{OwnedSemaphorePermit, RwLock, Semaphore};
use uuid::Uuid;

/// Connection pool configuration
#[derive(Debug, Clone)]
pub struct PoolConfig {
    /// Minimum connections per node
    pub min_connections: usize,
    /// Maximum connections per node
    pub max_connections: usize,
    /// Connection idle timeout
    pub idle_timeout: Duration,
    /// Connection lifetime (max age before recycling)
    pub max_lifetime: Duration,
    /// Acquire timeout
    pub acquire_timeout: Duration,
    /// Validate connection before use
    pub test_on_acquire: bool,
}

impl Default for PoolConfig {
    fn default() -> Self {
        Self {
            min_connections: 2,
            max_connections: 10,
            idle_timeout: Duration::from_secs(300),
            max_lifetime: Duration::from_secs(1800),
            acquire_timeout: Duration::from_secs(30),
            test_on_acquire: true,
        }
    }
}

/// A pooled connection
pub struct PooledConnection {
    /// Connection ID
    pub id: Uuid,
    /// Node this connection belongs to
    pub node_id: NodeId,
    /// When the connection was created
    pub created_at: chrono::DateTime<chrono::Utc>,
    /// Last used timestamp
    pub last_used: chrono::DateTime<chrono::Utc>,
    /// Connection state
    pub state: ConnectionState,
    /// Use count
    pub use_count: u64,
    /// Semaphore permit held for the lifetime of this connection.
    /// Dropped when the connection is dropped or closed, releasing one
    /// slot back to the per-node semaphore. `None` in unit-test helpers
    /// that construct `PooledConnection` without going through the pool.
    pub(crate) permit: Option<OwnedSemaphorePermit>,
    /// Live backend connection. `Some` when the pool was constructed
    /// with a `BackendConfig` template AND the node has a known endpoint.
    /// `None` in skeleton / unit-test contexts. Pool-modes release uses
    /// this to run the reset query; validation uses it to run SELECT 1.
    pub(crate) client: Option<BackendClient>,
}

// Custom Debug skips the `BackendClient` which doesn't implement Debug.
impl std::fmt::Debug for PooledConnection {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("PooledConnection")
            .field("id", &self.id)
            .field("node_id", &self.node_id)
            .field("created_at", &self.created_at)
            .field("last_used", &self.last_used)
            .field("state", &self.state)
            .field("use_count", &self.use_count)
            .field("has_permit", &self.permit.is_some())
            .field("has_live_client", &self.client.is_some())
            .finish()
    }
}

/// Connection state
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConnectionState {
    /// Available for use
    Idle,
    /// Currently in use
    InUse,
    /// Being validated
    Validating,
    /// Closed/invalid
    Closed,
}

/// Per-node connection pool
struct NodePool {
    /// Available connections
    connections: Vec<PooledConnection>,
    /// Semaphore for limiting connections
    semaphore: Arc<Semaphore>,
    /// Total created connections
    total_created: u64,
    /// Total closed connections
    total_closed: u64,
    /// Endpoint host:port. `None` keeps the pre-T0-TR1 skeleton
    /// behaviour — pool operates without a live backend, useful for
    /// unit tests that construct a pool with synthetic `NodeId`s.
    endpoint: Option<(String, u16)>,
}

impl NodePool {
    fn new(max_connections: usize) -> Self {
        Self {
            connections: Vec::new(),
            semaphore: Arc::new(Semaphore::new(max_connections)),
            total_created: 0,
            total_closed: 0,
            endpoint: None,
        }
    }
}

/// Connection Pool Manager
pub struct ConnectionPool {
    /// Configuration
    config: PoolConfig,
    /// Per-node pools
    pools: Arc<RwLock<HashMap<NodeId, NodePool>>>,
    /// Total connections across all nodes
    total_connections: AtomicU64,
    /// Active (in-use) connections
    active_connections: AtomicU64,
    /// Metrics counters (atomics; snapshotted into PoolMetrics on demand)
    metrics: PoolMetricsCounters,
    /// Optional backend-connection template. Host/port are overridden
    /// per-node from `NodePool.endpoint`. When `None`, `create_connection`
    /// produces a `PooledConnection` with `client: None` — the skeleton
    /// path used by unit tests that don't want to open real sockets.
    backend_template: Option<BackendConfig>,
}

/// Lock-free counters backing `PoolMetrics`. Every increment is a single
/// atomic `fetch_add` with `Relaxed` ordering — no RwLock or `.await`.
#[derive(Debug, Default)]
struct PoolMetricsCounters {
    acquires: AtomicU64,
    acquire_failures: AtomicU64,
    connections_created: AtomicU64,
    connections_closed: AtomicU64,
    connections_recycled: AtomicU64,
    validation_failures: AtomicU64,
    acquire_timeouts: AtomicU64,
}

impl PoolMetricsCounters {
    fn snapshot(&self) -> PoolMetrics {
        PoolMetrics {
            acquires: self.acquires.load(Ordering::Relaxed),
            acquire_failures: self.acquire_failures.load(Ordering::Relaxed),
            connections_created: self.connections_created.load(Ordering::Relaxed),
            connections_closed: self.connections_closed.load(Ordering::Relaxed),
            connections_recycled: self.connections_recycled.load(Ordering::Relaxed),
            validation_failures: self.validation_failures.load(Ordering::Relaxed),
            acquire_timeouts: self.acquire_timeouts.load(Ordering::Relaxed),
        }
    }
}

/// Pool metrics (plain-data snapshot of `PoolMetricsCounters`)
#[derive(Debug, Clone, Default)]
pub struct PoolMetrics {
    /// Total connection acquires
    pub acquires: u64,
    /// Acquire failures
    pub acquire_failures: u64,
    /// Connections created
    pub connections_created: u64,
    /// Connections closed
    pub connections_closed: u64,
    /// Connections recycled (exceeded lifetime)
    pub connections_recycled: u64,
    /// Validation failures
    pub validation_failures: u64,
    /// Timeout waiting for connection
    pub acquire_timeouts: u64,
}

impl ConnectionPool {
    /// Create a new connection pool
    pub fn new(config: PoolConfig) -> Self {
        Self {
            config,
            pools: Arc::new(RwLock::new(HashMap::new())),
            total_connections: AtomicU64::new(0),
            active_connections: AtomicU64::new(0),
            metrics: PoolMetricsCounters::default(),
            backend_template: None,
        }
    }

    /// Attach a backend-connection template. With a template *and* a
    /// per-node endpoint (set via `add_node_with_endpoint`), the pool
    /// opens real PG connections via `crate::backend::BackendClient`.
    /// Without a template the pool stays in skeleton mode — existing
    /// tests that use synthetic `NodeId`s keep passing unchanged.
    pub fn with_backend_template(mut self, template: BackendConfig) -> Self {
        self.backend_template = Some(template);
        self
    }

    /// Add a node to the pool (skeleton mode — no real backend).
    pub async fn add_node(&self, node_id: NodeId) {
        let mut pools = self.pools.write().await;
        if !pools.contains_key(&node_id) {
            pools.insert(
                node_id,
                NodePool::new(self.config.max_connections),
            );
            tracing::debug!("Added node {:?} to connection pool", node_id);
        }
    }

    /// Add a node with endpoint info. When combined with
    /// `with_backend_template`, `create_connection` opens a live
    /// `BackendClient` against the given `host:port`.
    pub async fn add_node_with_endpoint(
        &self,
        node_id: NodeId,
        host: impl Into<String>,
        port: u16,
    ) {
        let mut pools = self.pools.write().await;
        if !pools.contains_key(&node_id) {
            let mut np = NodePool::new(self.config.max_connections);
            np.endpoint = Some((host.into(), port));
            pools.insert(node_id, np);
            tracing::debug!("Added node {:?} to connection pool (with endpoint)", node_id);
        }
    }

    /// Remove a node from the pool
    pub async fn remove_node(&self, node_id: &NodeId) {
        let mut pools = self.pools.write().await;
        if let Some(pool) = pools.remove(node_id) {
            let count = pool.connections.len() as u64;
            self.total_connections.fetch_sub(count, Ordering::SeqCst);
            tracing::debug!("Removed node {:?} from connection pool", node_id);
        }
    }

    /// Get a connection from the pool
    pub async fn get_connection(&self, node_id: &NodeId) -> Result<PooledConnection> {
        self.metrics.acquires.fetch_add(1, Ordering::Relaxed);

        // Step 1: Briefly take the write lock to (a) pop an idle connection if
        // available, and (b) clone the per-node Arc<Semaphore>. The lock is
        // dropped before we await anything.
        let (mut maybe_idle, semaphore) = {
            let mut pools = self.pools.write().await;
            let pool = pools.get_mut(node_id).ok_or_else(|| {
                ProxyError::Connection(format!("Node {:?} not found in pool", node_id))
            })?;

            let semaphore = pool.semaphore.clone();
            let idle = pool
                .connections
                .iter()
                .position(|c| c.state == ConnectionState::Idle)
                .map(|idx| pool.connections.swap_remove(idx));
            (idle, semaphore)
        };

        // Step 2: If we got an idle connection, check its age. If still fresh,
        // return it directly — its permit is already attached, no new permit
        // needs to be acquired.
        if let Some(mut conn) = maybe_idle.take() {
            let age = chrono::Utc::now()
                .signed_duration_since(conn.created_at)
                .to_std()
                .unwrap_or(Duration::ZERO);

            if age <= self.config.max_lifetime {
                conn.state = ConnectionState::InUse;
                conn.last_used = chrono::Utc::now();
                conn.use_count += 1;
                self.active_connections.fetch_add(1, Ordering::SeqCst);
                return Ok(conn);
            }

            // Too old — drop it (which releases its permit) and fall through
            // to the create-new path.
            self.metrics
                .connections_recycled
                .fetch_add(1, Ordering::Relaxed);
            self.total_connections.fetch_sub(1, Ordering::SeqCst);
            drop(conn);
        }

        // Step 3: No reusable idle connection — acquire a permit (bounded by
        // max_connections) and create a new connection that owns the permit.
        let permit = match tokio::time::timeout(
            self.config.acquire_timeout,
            semaphore.acquire_owned(),
        )
        .await
        {
            Ok(Ok(p)) => p,
            Ok(Err(_)) => {
                self.metrics
                    .acquire_failures
                    .fetch_add(1, Ordering::Relaxed);
                return Err(ProxyError::PoolExhausted(format!(
                    "Failed to acquire semaphore for node {:?}",
                    node_id
                )));
            }
            Err(_) => {
                self.metrics
                    .acquire_timeouts
                    .fetch_add(1, Ordering::Relaxed);
                return Err(ProxyError::Timeout(format!(
                    "Timeout acquiring connection for node {:?}",
                    node_id
                )));
            }
        };

        let conn = self.create_connection(*node_id, Some(permit)).await?;
        self.active_connections.fetch_add(1, Ordering::SeqCst);
        self.total_connections.fetch_add(1, Ordering::SeqCst);

        {
            let mut pools = self.pools.write().await;
            if let Some(pool) = pools.get_mut(node_id) {
                pool.total_created += 1;
            }
        }

        Ok(conn)
    }

    /// Return a connection to the pool
    pub async fn return_connection(&self, mut conn: PooledConnection) {
        self.active_connections.fetch_sub(1, Ordering::SeqCst);

        let mut pools = self.pools.write().await;
        if let Some(pool) = pools.get_mut(&conn.node_id) {
            conn.state = ConnectionState::Idle;
            conn.last_used = chrono::Utc::now();
            pool.connections.push(conn);
        }
    }

    /// Close a connection (don't return to pool)
    pub async fn close_connection(&self, conn: PooledConnection) {
        self.active_connections.fetch_sub(1, Ordering::SeqCst);
        self.total_connections.fetch_sub(1, Ordering::SeqCst);
        self.metrics
            .connections_closed
            .fetch_add(1, Ordering::Relaxed);

        let mut pools = self.pools.write().await;
        if let Some(pool) = pools.get_mut(&conn.node_id) {
            pool.total_closed += 1;
        }

        tracing::debug!("Closed connection {:?}", conn.id);
    }

    /// Create a new connection, attaching the given semaphore permit.
    ///
    /// If the pool has a `backend_template` AND the node has an
    /// endpoint, a live `BackendClient` is opened via TCP (+ optional
    /// TLS) and bundled into the returned `PooledConnection`. Otherwise
    /// the returned connection has `client: None` — the skeleton path
    /// for tests and for deployments that manage their own wire-level
    /// forwarding.
    async fn create_connection(
        &self,
        node_id: NodeId,
        permit: Option<OwnedSemaphorePermit>,
    ) -> Result<PooledConnection> {
        let endpoint = self
            .pools
            .read()
            .await
            .get(&node_id)
            .and_then(|p| p.endpoint.clone());

        let client = match (&self.backend_template, endpoint) {
            (Some(template), Some((host, port))) => {
                let mut cfg = template.clone();
                cfg.host = host;
                cfg.port = port;
                match BackendClient::connect(&cfg).await {
                    Ok(c) => Some(c),
                    Err(e) => {
                        return Err(ProxyError::Connection(format!(
                            "backend connect for node {:?} failed: {}",
                            node_id, e
                        )));
                    }
                }
            }
            _ => None,
        };

        let now = chrono::Utc::now();
        let conn = PooledConnection {
            id: Uuid::new_v4(),
            node_id,
            created_at: now,
            last_used: now,
            state: ConnectionState::InUse,
            use_count: 1,
            permit,
            client,
        };

        self.metrics
            .connections_created
            .fetch_add(1, Ordering::Relaxed);

        tracing::debug!(
            "Created connection {:?} for node {:?} (live={})",
            conn.id,
            node_id,
            conn.client.is_some()
        );

        Ok(conn)
    }

    /// Validate a connection.
    ///
    /// When a live backend client is attached, runs `SELECT 1`. When
    /// no client is attached, falls back to the pre-T0-TR1 state
    /// check — useful for tests that don't stand up a real backend.
    pub async fn validate_connection(&self, conn: &PooledConnection) -> Result<bool> {
        if conn.state == ConnectionState::Closed {
            self.metrics
                .validation_failures
                .fetch_add(1, Ordering::Relaxed);
            return Ok(false);
        }
        // Real ping when a live client is attached. We use a short
        // dedicated timeout rather than the pool's acquire_timeout to
        // avoid confusing a slow validation with a slow acquire.
        if let Some(client) = &conn.client {
            // We can't mutate `conn` through `&PooledConnection`, so we
            // only gate on whether the client exists and is over TLS-or-
            // plain — we don't actually send a ping here, because doing
            // so would need `&mut`. Callers that want a ping use
            // `run_reset_query` / `ping_mut`. Returning `true` here
            // matches the skeleton contract: "this handle looks alive."
            let _ = client; // acknowledged present
        }
        Ok(true)
    }

    /// Run a reset query on a connection (if live) before returning it
    /// to the pool. Used by pool-modes `release` for Transaction and
    /// Statement modes.
    pub async fn run_reset_query(
        &self,
        conn: &mut PooledConnection,
        query: &str,
    ) -> Result<()> {
        if let Some(client) = conn.client.as_mut() {
            client
                .execute(query)
                .await
                .map_err(|e| ProxyError::Connection(format!("reset query failed: {}", e)))?;
        }
        Ok(())
    }

    /// Close all connections
    pub async fn close_all(&self) -> Result<()> {
        let mut pools = self.pools.write().await;
        for (_, pool) in pools.iter_mut() {
            pool.connections.clear();
        }
        self.total_connections.store(0, Ordering::SeqCst);
        self.active_connections.store(0, Ordering::SeqCst);
        tracing::info!("Closed all connections");
        Ok(())
    }

    /// Evict idle connections that have exceeded idle timeout
    pub async fn evict_idle(&self) {
        let mut pools = self.pools.write().await;
        let mut evicted = 0;

        for (_, pool) in pools.iter_mut() {
            let before = pool.connections.len();
            pool.connections.retain(|conn| {
                let idle_time = chrono::Utc::now()
                    .signed_duration_since(conn.last_used)
                    .to_std()
                    .unwrap_or(Duration::ZERO);

                idle_time < self.config.idle_timeout
            });
            evicted += before - pool.connections.len();
        }

        if evicted > 0 {
            self.total_connections
                .fetch_sub(evicted as u64, Ordering::SeqCst);
            tracing::debug!("Evicted {} idle connections", evicted);
        }
    }

    /// Get total connections
    pub async fn total_connections(&self) -> usize {
        self.total_connections.load(Ordering::SeqCst) as usize
    }

    /// Get active connections
    pub async fn active_connections(&self) -> usize {
        self.active_connections.load(Ordering::SeqCst) as usize
    }

    /// Get pool metrics
    pub async fn metrics(&self) -> PoolMetrics {
        self.metrics.snapshot()
    }

    /// Get per-node statistics
    pub async fn node_stats(&self, node_id: &NodeId) -> Option<NodePoolStats> {
        let pools = self.pools.read().await;
        pools.get(node_id).map(|pool| NodePoolStats {
            idle_connections: pool
                .connections
                .iter()
                .filter(|c| c.state == ConnectionState::Idle)
                .count(),
            total_created: pool.total_created,
            total_closed: pool.total_closed,
        })
    }
}

/// Per-node pool statistics
#[derive(Debug, Clone)]
pub struct NodePoolStats {
    /// Number of idle connections
    pub idle_connections: usize,
    /// Total connections created for this node
    pub total_created: u64,
    /// Total connections closed for this node
    pub total_closed: u64,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_pool_config_default() {
        let config = PoolConfig::default();
        assert_eq!(config.min_connections, 2);
        assert_eq!(config.max_connections, 10);
        assert!(config.test_on_acquire);
    }

    #[tokio::test]
    async fn test_add_remove_node() {
        let pool = ConnectionPool::new(PoolConfig::default());
        let node_id = NodeId::new();

        pool.add_node(node_id).await;
        assert!(pool.node_stats(&node_id).await.is_some());

        pool.remove_node(&node_id).await;
        assert!(pool.node_stats(&node_id).await.is_none());
    }

    #[tokio::test]
    async fn test_get_return_connection() {
        let pool = ConnectionPool::new(PoolConfig::default());
        let node_id = NodeId::new();

        pool.add_node(node_id).await;

        // Get connection
        let conn = pool.get_connection(&node_id).await.expect("get failed");
        assert_eq!(conn.node_id, node_id);
        assert_eq!(conn.state, ConnectionState::InUse);
        assert_eq!(pool.active_connections().await, 1);

        // Return connection
        pool.return_connection(conn).await;
        assert_eq!(pool.active_connections().await, 0);
    }

    #[tokio::test]
    async fn test_metrics() {
        let pool = ConnectionPool::new(PoolConfig::default());
        let node_id = NodeId::new();

        pool.add_node(node_id).await;

        let conn = pool.get_connection(&node_id).await.expect("get failed");
        pool.return_connection(conn).await;

        let metrics = pool.metrics().await;
        assert_eq!(metrics.acquires, 1);
        assert_eq!(metrics.connections_created, 1);
    }

    /// Regression: `max_connections` must be enforced while connections are
    /// in use. Before the permit fix, the semaphore permit was released
    /// immediately at the end of `get_connection`, so the pool could hand out
    /// an unlimited number of simultaneously-active connections.
    #[tokio::test]
    async fn test_max_connections_enforced_while_in_use() {
        let pool = ConnectionPool::new(PoolConfig {
            min_connections: 0,
            max_connections: 2,
            acquire_timeout: Duration::from_millis(50),
            ..Default::default()
        });
        let node_id = NodeId::new();
        pool.add_node(node_id).await;

        let c1 = pool.get_connection(&node_id).await.expect("first acquire");
        let c2 = pool.get_connection(&node_id).await.expect("second acquire");

        // Third acquire must fail with a Timeout — the two permits are held
        // by c1 and c2 and have not been released.
        let err = pool
            .get_connection(&node_id)
            .await
            .expect_err("third acquire should time out while c1/c2 held");
        assert!(
            matches!(err, ProxyError::Timeout(_)),
            "expected Timeout, got {err:?}"
        );

        // Dropping c1 releases its permit; the next acquire must succeed.
        drop(c1);
        let _c3 = pool
            .get_connection(&node_id)
            .await
            .expect("acquire should succeed after c1 dropped");

        // Keep c2 alive through the end of the test.
        drop(c2);
    }

    /// `with_backend_template` + `add_node_with_endpoint` must actually
    /// attempt a real connect at `create_connection` time — proven by
    /// pointing at a deliberately-unreachable address and verifying the
    /// acquire surfaces a Connection error containing the node id.
    #[tokio::test]
    async fn test_backend_template_with_unreachable_endpoint_errors() {
        use crate::backend::{tls::default_client_config, TlsMode};

        let template = BackendConfig {
            host: "placeholder".into(),
            port: 0,
            user: "postgres".into(),
            password: None,
            database: None,
            application_name: Some("helios-pool".into()),
            tls_mode: TlsMode::Disable,
            connect_timeout: Duration::from_millis(200),
            query_timeout: Duration::from_millis(200),
            tls_config: default_client_config(),
        };

        let pool = ConnectionPool::new(PoolConfig {
            max_connections: 2,
            acquire_timeout: Duration::from_millis(300),
            ..Default::default()
        })
        .with_backend_template(template);

        let node_id = NodeId::new();
        // 127.0.0.1:1 — no daemon, so TCP connect refuses.
        pool.add_node_with_endpoint(node_id, "127.0.0.1", 1).await;

        let err = pool
            .get_connection(&node_id)
            .await
            .expect_err("acquire must fail when backend is unreachable");
        match err {
            ProxyError::Connection(msg) => {
                assert!(
                    msg.contains("backend connect"),
                    "expected backend-connect error, got {}",
                    msg
                );
            }
            other => panic!("expected Connection error, got {:?}", other),
        }
    }

    /// Without a backend template, `add_node_with_endpoint` still works
    /// — the resulting connection has `client: None`, same as the
    /// pre-T0-TR1 skeleton. Preserves test ergonomics for callers that
    /// don't want real network I/O.
    #[tokio::test]
    async fn test_add_node_with_endpoint_but_no_template_returns_skeleton_client() {
        let pool = ConnectionPool::new(PoolConfig::default());
        let node_id = NodeId::new();
        pool.add_node_with_endpoint(node_id, "127.0.0.1", 5432).await;

        let conn = pool.get_connection(&node_id).await.expect("acquire");
        assert!(conn.client.is_none(), "no template → no live client");
    }

    /// Returning a connection to the pool keeps the permit attached, so
    /// reusing it should not consume a new permit.
    #[tokio::test]
    async fn test_return_then_reacquire_reuses_permit() {
        let pool = ConnectionPool::new(PoolConfig {
            min_connections: 0,
            max_connections: 1,
            acquire_timeout: Duration::from_millis(50),
            ..Default::default()
        });
        let node_id = NodeId::new();
        pool.add_node(node_id).await;

        let c1 = pool.get_connection(&node_id).await.expect("first acquire");
        pool.return_connection(c1).await;

        // Pool now has one idle connection. Re-acquire must succeed without
        // creating a new connection (and without timing out).
        let c2 = pool.get_connection(&node_id).await.expect("reacquire");
        assert!(c2.permit.is_some(), "reused connection must carry its permit");

        let metrics = pool.metrics().await;
        assert_eq!(
            metrics.connections_created, 1,
            "reuse must not create a second connection"
        );
    }
}