Skip to main content

opencode_cloud_core/docker/
health.rs

1//! Health check module for OpenCode service
2//!
3//! Provides health checking functionality by querying OpenCode's /global/health endpoint.
4
5use serde::{Deserialize, Serialize};
6use std::time::Duration;
7use thiserror::Error;
8
9use super::DockerClient;
10
11/// Response from OpenCode's /global/health endpoint
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct HealthResponse {
14    /// Whether the service is healthy
15    pub healthy: bool,
16    /// Service version string
17    pub version: String,
18}
19
20/// Extended health response including container stats
21#[derive(Debug, Serialize)]
22pub struct ExtendedHealthResponse {
23    /// Whether the service is healthy
24    pub healthy: bool,
25    /// Service version string
26    pub version: String,
27    /// Container state (running, stopped, etc.)
28    pub container_state: String,
29    /// Uptime in seconds
30    pub uptime_seconds: u64,
31    /// Memory usage in megabytes (if available)
32    #[serde(skip_serializing_if = "Option::is_none")]
33    pub memory_usage_mb: Option<u64>,
34}
35
36/// Errors that can occur during health checks
37#[derive(Debug, Error)]
38pub enum HealthError {
39    /// HTTP request failed
40    #[error("Request failed: {0}")]
41    RequestError(#[from] reqwest::Error),
42
43    /// Service returned non-200 status
44    #[error("Service unhealthy (HTTP {0})")]
45    Unhealthy(u16),
46
47    /// Connection refused - service may not be running
48    #[error("Connection refused - service may not be running")]
49    ConnectionRefused,
50
51    /// Request timed out - service may be starting
52    #[error("Timeout - service may be starting")]
53    Timeout,
54}
55
56/// Check health by querying OpenCode's /global/health endpoint
57///
58/// Returns the health response on success (HTTP 200).
59/// Returns an error for connection issues, timeouts, or non-200 responses.
60pub async fn check_health(port: u16) -> Result<HealthResponse, HealthError> {
61    let url = format!("http://127.0.0.1:{port}/global/health");
62
63    let client = reqwest::Client::builder()
64        .timeout(Duration::from_secs(5))
65        .build()?;
66
67    let response = match client.get(&url).send().await {
68        Ok(resp) => resp,
69        Err(e) => {
70            // Check for connection refused
71            if e.is_connect() {
72                return Err(HealthError::ConnectionRefused);
73            }
74            // Check for timeout
75            if e.is_timeout() {
76                return Err(HealthError::Timeout);
77            }
78            return Err(HealthError::RequestError(e));
79        }
80    };
81
82    let status = response.status();
83
84    if status.is_success() {
85        let health_response = response.json::<HealthResponse>().await?;
86        Ok(health_response)
87    } else {
88        Err(HealthError::Unhealthy(status.as_u16()))
89    }
90}
91
92/// Check health with extended information including container stats
93///
94/// Combines basic health check with container statistics from Docker.
95/// If container stats fail, still returns response with container_state = "unknown".
96pub async fn check_health_extended(
97    client: &DockerClient,
98    port: u16,
99) -> Result<ExtendedHealthResponse, HealthError> {
100    // Get basic health info
101    let health = check_health(port).await?;
102
103    // Get container stats
104    let container_name = super::CONTAINER_NAME;
105
106    // Try to get container info
107    let (container_state, uptime_seconds, memory_usage_mb) =
108        match client.inner().inspect_container(container_name, None).await {
109            Ok(info) => {
110                let state = info
111                    .state
112                    .as_ref()
113                    .and_then(|s| s.status.as_ref())
114                    .map(|s| s.to_string())
115                    .unwrap_or_else(|| "unknown".to_string());
116
117                // Calculate uptime
118                let uptime = info
119                    .state
120                    .as_ref()
121                    .and_then(|s| s.started_at.as_ref())
122                    .and_then(|started| {
123                        let timestamp = chrono::DateTime::parse_from_rfc3339(started).ok()?;
124                        let now = chrono::Utc::now();
125                        let started_utc = timestamp.with_timezone(&chrono::Utc);
126                        if now >= started_utc {
127                            Some((now - started_utc).num_seconds() as u64)
128                        } else {
129                            None
130                        }
131                    })
132                    .unwrap_or(0);
133
134                // Get memory usage (would require stats API call - skip for now)
135                let memory = None;
136
137                (state, uptime, memory)
138            }
139            Err(_) => ("unknown".to_string(), 0, None),
140        };
141
142    Ok(ExtendedHealthResponse {
143        healthy: health.healthy,
144        version: health.version,
145        container_state,
146        uptime_seconds,
147        memory_usage_mb,
148    })
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    #[tokio::test]
156    async fn test_health_check_connection_refused() {
157        // Port 1 should always refuse connection
158        let result = check_health(1).await;
159        assert!(result.is_err());
160        match result.unwrap_err() {
161            HealthError::ConnectionRefused => {}
162            other => panic!("Expected ConnectionRefused, got: {other:?}"),
163        }
164    }
165}