arch_toolkit/
health.rs

1//! Health check functionality for archlinux.org services.
2
3use crate::error::Result;
4use crate::types::{HealthStatus, ServiceStatus};
5use reqwest::Client;
6use std::time::{Duration, Instant};
7use tracing::debug;
8
9/// Health check endpoint (minimal AUR RPC request).
10const HEALTH_CHECK_URL: &str = "https://aur.archlinux.org/rpc/v5/info";
11
12/// Default timeout for health checks (shorter than regular operations).
13const DEFAULT_HEALTH_CHECK_TIMEOUT: Duration = Duration::from_secs(5);
14
15/// Latency threshold for "degraded" status (2 seconds).
16const DEGRADED_LATENCY_THRESHOLD: Duration = Duration::from_secs(2);
17
18/// What: Perform health check against AUR API.
19///
20/// Inputs:
21/// - `client`: HTTP client to use for the request
22/// - `timeout`: Optional timeout override (uses default if None)
23///
24/// Output:
25/// - `Result<HealthStatus>` with service status and latency
26///
27/// Details:
28/// - Uses minimal RPC request (info with no packages)
29/// - Validates response is valid JSON with expected structure
30/// - Measures round-trip latency
31/// - Returns `HealthStatus` with appropriate `ServiceStatus` based on:
32///   - Success + latency < 2s = `Healthy`
33///   - Success + latency >= 2s = `Degraded`
34///   - HTTP error = `Unreachable`
35///   - Timeout = `Timeout`
36///
37/// # Errors
38/// - Never returns an error - always returns `Ok(HealthStatus)` with appropriate status
39/// - Network errors are represented as `ServiceStatus::Unreachable` or `ServiceStatus::Timeout`
40pub async fn check_health(client: &Client, timeout: Option<Duration>) -> Result<HealthStatus> {
41    let start = Instant::now();
42    let checked_at = start;
43
44    let timeout_duration = timeout.unwrap_or(DEFAULT_HEALTH_CHECK_TIMEOUT);
45
46    // Create a request with health-check-specific timeout
47    let result = client
48        .get(HEALTH_CHECK_URL)
49        .timeout(timeout_duration)
50        .send()
51        .await;
52
53    let latency = start.elapsed();
54
55    match result {
56        Ok(response) => {
57            // Check HTTP status
58            if !response.status().is_success() {
59                debug!(
60                    status = %response.status(),
61                    latency_ms = latency.as_millis(),
62                    "health check returned non-success status"
63                );
64                return Ok(HealthStatus {
65                    aur_api: ServiceStatus::Unreachable,
66                    latency: Some(latency),
67                    checked_at,
68                });
69            }
70
71            // Validate response body is valid JSON
72            match response.json::<serde_json::Value>().await {
73                Ok(json) => {
74                    // Verify it's a valid AUR RPC response
75                    let is_valid = json.get("version").is_some() && json.get("type").is_some();
76
77                    if !is_valid {
78                        debug!(
79                            latency_ms = latency.as_millis(),
80                            "health check response missing expected fields"
81                        );
82                        return Ok(HealthStatus {
83                            aur_api: ServiceStatus::Degraded,
84                            latency: Some(latency),
85                            checked_at,
86                        });
87                    }
88
89                    // Determine status based on latency
90                    let status = if latency > DEGRADED_LATENCY_THRESHOLD {
91                        ServiceStatus::Degraded
92                    } else {
93                        ServiceStatus::Healthy
94                    };
95
96                    debug!(
97                        latency_ms = latency.as_millis(),
98                        ?status,
99                        "health check completed"
100                    );
101
102                    Ok(HealthStatus {
103                        aur_api: status,
104                        latency: Some(latency),
105                        checked_at,
106                    })
107                }
108                Err(e) => {
109                    debug!(
110                        error = %e,
111                        latency_ms = latency.as_millis(),
112                        "health check failed to parse response"
113                    );
114                    Ok(HealthStatus {
115                        aur_api: ServiceStatus::Degraded,
116                        latency: Some(latency),
117                        checked_at,
118                    })
119                }
120            }
121        }
122        Err(e) => {
123            let status = if e.is_timeout() {
124                ServiceStatus::Timeout
125            } else {
126                ServiceStatus::Unreachable
127            };
128
129            debug!(
130                error = %e,
131                ?status,
132                latency_ms = latency.as_millis(),
133                "health check failed"
134            );
135
136            Ok(HealthStatus {
137                aur_api: status,
138                latency: Some(latency),
139                checked_at,
140            })
141        }
142    }
143}