reasonkit-core 0.1.8

The Reasoning Engine — Auditable Reasoning for Production AI | Rust-Native | Turn Prompts into Protocols
//! Health Monitoring for Daemon Mode
//!
//! Monitors MCP server health and connection pool status.
//! Works independently of the mcp-server-pro registry.

use super::pool::ConnectionPool;
use std::sync::Arc;
use std::time::Duration;
use tracing::{debug, info, warn};

/// Health status for a server
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HealthStatus {
    /// Server is healthy and responding
    Healthy,
    /// Server is degraded but functional
    Degraded,
    /// Server is unhealthy or not responding
    Unhealthy,
    /// Server status is unknown
    Unknown,
}

/// Health check result for a server
#[derive(Debug, Clone)]
pub struct ServerHealth {
    /// Server name
    pub name: String,
    /// Current health status
    pub status: HealthStatus,
    /// Active connection count
    pub active_connections: usize,
    /// Last successful call timestamp (if any)
    pub last_success: Option<std::time::Instant>,
}

/// Daemon-wide health summary
#[derive(Debug, Clone)]
pub struct DaemonHealth {
    /// Overall daemon health
    pub status: HealthStatus,
    /// Individual server health
    pub servers: Vec<ServerHealth>,
    /// Connection pool statistics
    pub pool_hits: u64,
    /// Connection pool misses
    pub pool_misses: u64,
    /// Total calls processed
    pub total_calls: u64,
}

/// Health monitor for daemon mode
pub struct HealthMonitor {
    /// Check interval
    interval: Duration,
    /// Connection pool reference
    pool: Arc<ConnectionPool>,
}

impl HealthMonitor {
    /// Create new health monitor
    pub fn new(pool: Arc<ConnectionPool>, interval_secs: u64) -> Self {
        Self {
            interval: Duration::from_secs(interval_secs),
            pool,
        }
    }

    /// Get current daemon health
    pub fn get_health(&self) -> DaemonHealth {
        let stats = self.pool.stats();

        // Calculate overall health based on pool statistics
        // Healthy: no calls yet with zero misses, or hits exceed misses
        // Degraded: calls made but more misses than hits
        // Unknown: no calls yet
        let status = if stats.total_calls == 0 {
            HealthStatus::Unknown
        } else if stats.cache_hits >= stats.cache_misses {
            HealthStatus::Healthy
        } else {
            HealthStatus::Degraded
        };

        DaemonHealth {
            status,
            servers: Vec::new(), // Populated by full health check
            pool_hits: stats.cache_hits,
            pool_misses: stats.cache_misses,
            total_calls: stats.total_calls,
        }
    }

    /// Run health monitoring loop (non-blocking task)
    pub async fn run(&self) {
        info!(
            "Daemon health monitor started (interval: {:?})",
            self.interval
        );

        let mut interval = tokio::time::interval(self.interval);

        loop {
            interval.tick().await;

            let health = self.get_health();

            match health.status {
                HealthStatus::Healthy => {
                    debug!(
                        "Daemon healthy: {} calls, {:.1}% hit rate",
                        health.total_calls,
                        if health.total_calls > 0 {
                            (health.pool_hits as f64 / health.total_calls as f64) * 100.0
                        } else {
                            0.0
                        }
                    );
                }
                HealthStatus::Degraded => {
                    warn!(
                        "Daemon degraded: {} calls, {} pool misses",
                        health.total_calls, health.pool_misses
                    );
                }
                HealthStatus::Unhealthy => {
                    warn!("Daemon unhealthy!");
                }
                HealthStatus::Unknown => {
                    debug!("Daemon idle, no calls yet");
                }
            }
        }
    }

    /// Get check interval
    pub fn interval(&self) -> Duration {
        self.interval
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_health_status_eq() {
        assert_eq!(HealthStatus::Healthy, HealthStatus::Healthy);
        assert_ne!(HealthStatus::Healthy, HealthStatus::Unhealthy);
    }

    #[test]
    fn test_daemon_health_creation() {
        let health = DaemonHealth {
            status: HealthStatus::Unknown,
            servers: Vec::new(),
            pool_hits: 0,
            pool_misses: 0,
            total_calls: 0,
        };
        assert_eq!(health.status, HealthStatus::Unknown);
    }

    #[tokio::test]
    async fn test_health_monitor_creation() {
        let pool = Arc::new(ConnectionPool::new());
        let monitor = HealthMonitor::new(pool, 30);
        assert_eq!(monitor.interval(), Duration::from_secs(30));
    }

    #[tokio::test]
    async fn test_get_health_initial() {
        let pool = Arc::new(ConnectionPool::new());
        let monitor = HealthMonitor::new(pool, 30);
        let health = monitor.get_health();

        // No calls yet, status should be Unknown
        assert_eq!(health.status, HealthStatus::Unknown);
        assert_eq!(health.total_calls, 0);
    }
}