geist_supervisor 0.1.28

Generic OTA supervisor for field devices
Documentation
use anyhow::Result;
use std::time::Duration;
use tracing::{debug, info, warn};

/// Service for checking the health of another service.
pub struct HealthCheckService {
    client: reqwest::blocking::Client,
    health_check_url: String,
    retries: u32,
    delay: Duration,
}

impl HealthCheckService {
    /// Create a new HealthCheckService.
    pub fn new(health_check_url: String, retries: u32, delay: Duration) -> Self {
        let client = reqwest::blocking::Client::builder()
            .timeout(Duration::from_secs(10))
            .build()
            .expect("Failed to create HTTP client");

        Self {
            client,
            health_check_url,
            retries,
            delay,
        }
    }

    /// Check the health of the service, with retries.
    pub fn check_health(&self) -> Result<()> {
        info!("Performing health check at: {}", self.health_check_url);

        for attempt in 1..=self.retries {
            info!("Health check attempt {}/{}", attempt, self.retries);

            match self.client.get(&self.health_check_url).send() {
                Ok(response) => {
                    if response.status().is_success() {
                        info!("Health check successful!");
                        return Ok(());
                    }
                    warn!(
                        "Health check failed with status: {} (attempt {})",
                        response.status(),
                        attempt
                    );

                    // Log response body for debugging
                    if let Ok(body) = response.text() {
                        debug!("Response body: {}", body);
                    }
                }
                Err(e) => {
                    if e.is_connect() {
                        warn!(
                            "Health check connection failed (attempt {}): {}",
                            attempt, e
                        );
                        debug!("This usually means the service isn't listening on the expected port yet");
                    } else if e.is_timeout() {
                        warn!("Health check timed out (attempt {}): {}", attempt, e);
                    } else {
                        warn!("Health check request failed (attempt {}): {}", attempt, e);
                    }
                }
            }

            if attempt < self.retries {
                info!("Waiting {:?} before next health check", self.delay);
                std::thread::sleep(self.delay);
            }
        }

        anyhow::bail!(
            "Service failed to become healthy after {} attempts at {}",
            self.retries,
            self.health_check_url
        )
    }
}