arch_toolkit/health.rs
1//! Health check functionality for archlinux.org services.
2
3use crate::error::Result;
4use crate::types::{HealthStatus, ServiceStatus};
5use reqwest::Client;
6use std::time::{Duration, Instant};
7use tracing::debug;
8
9/// Health check endpoint (minimal AUR RPC request).
10const HEALTH_CHECK_URL: &str = "https://aur.archlinux.org/rpc/v5/info";
11
12/// Default timeout for health checks (shorter than regular operations).
13const DEFAULT_HEALTH_CHECK_TIMEOUT: Duration = Duration::from_secs(5);
14
15/// Latency threshold for "degraded" status (2 seconds).
16const DEGRADED_LATENCY_THRESHOLD: Duration = Duration::from_secs(2);
17
18/// What: Perform health check against AUR API.
19///
20/// Inputs:
21/// - `client`: HTTP client to use for the request
22/// - `timeout`: Optional timeout override (uses default if None)
23///
24/// Output:
25/// - `Result<HealthStatus>` with service status and latency
26///
27/// Details:
28/// - Uses minimal RPC request (info with no packages)
29/// - Validates response is valid JSON with expected structure
30/// - Measures round-trip latency
31/// - Returns `HealthStatus` with appropriate `ServiceStatus` based on:
32/// - Success + latency < 2s = `Healthy`
33/// - Success + latency >= 2s = `Degraded`
34/// - HTTP error = `Unreachable`
35/// - Timeout = `Timeout`
36///
37/// # Errors
38/// - Never returns an error - always returns `Ok(HealthStatus)` with appropriate status
39/// - Network errors are represented as `ServiceStatus::Unreachable` or `ServiceStatus::Timeout`
40pub async fn check_health(client: &Client, timeout: Option<Duration>) -> Result<HealthStatus> {
41 let start = Instant::now();
42 let checked_at = start;
43
44 let timeout_duration = timeout.unwrap_or(DEFAULT_HEALTH_CHECK_TIMEOUT);
45
46 // Create a request with health-check-specific timeout
47 let result = client
48 .get(HEALTH_CHECK_URL)
49 .timeout(timeout_duration)
50 .send()
51 .await;
52
53 let latency = start.elapsed();
54
55 match result {
56 Ok(response) => {
57 // Check HTTP status
58 if !response.status().is_success() {
59 debug!(
60 status = %response.status(),
61 latency_ms = latency.as_millis(),
62 "health check returned non-success status"
63 );
64 return Ok(HealthStatus {
65 aur_api: ServiceStatus::Unreachable,
66 latency: Some(latency),
67 checked_at,
68 });
69 }
70
71 // Validate response body is valid JSON
72 match response.json::<serde_json::Value>().await {
73 Ok(json) => {
74 // Verify it's a valid AUR RPC response
75 let is_valid = json.get("version").is_some() && json.get("type").is_some();
76
77 if !is_valid {
78 debug!(
79 latency_ms = latency.as_millis(),
80 "health check response missing expected fields"
81 );
82 return Ok(HealthStatus {
83 aur_api: ServiceStatus::Degraded,
84 latency: Some(latency),
85 checked_at,
86 });
87 }
88
89 // Determine status based on latency
90 let status = if latency > DEGRADED_LATENCY_THRESHOLD {
91 ServiceStatus::Degraded
92 } else {
93 ServiceStatus::Healthy
94 };
95
96 debug!(
97 latency_ms = latency.as_millis(),
98 ?status,
99 "health check completed"
100 );
101
102 Ok(HealthStatus {
103 aur_api: status,
104 latency: Some(latency),
105 checked_at,
106 })
107 }
108 Err(e) => {
109 debug!(
110 error = %e,
111 latency_ms = latency.as_millis(),
112 "health check failed to parse response"
113 );
114 Ok(HealthStatus {
115 aur_api: ServiceStatus::Degraded,
116 latency: Some(latency),
117 checked_at,
118 })
119 }
120 }
121 }
122 Err(e) => {
123 let status = if e.is_timeout() {
124 ServiceStatus::Timeout
125 } else {
126 ServiceStatus::Unreachable
127 };
128
129 debug!(
130 error = %e,
131 ?status,
132 latency_ms = latency.as_millis(),
133 "health check failed"
134 );
135
136 Ok(HealthStatus {
137 aur_api: status,
138 latency: Some(latency),
139 checked_at,
140 })
141 }
142 }
143}