use anyhow::{Result, anyhow};
use axum::{Router, extract::State, http::StatusCode, response::IntoResponse, routing::get};
use prometheus::{HistogramVec, IntCounterVec, IntGaugeVec, Registry, histogram_opts, opts};
use std::sync::Arc;
use tokio::net::TcpListener;
use tracing::{debug, error};
#[derive(Debug)]
pub struct ServiceMetrics {
registry: Arc<Registry>,
pub epazote_status: IntGaugeVec, pub epazote_failures_total: IntCounterVec, pub epazote_response_time: HistogramVec,
pub epazote_ssl_cert_expiry_seconds: IntGaugeVec,
}
impl ServiceMetrics {
pub fn new() -> Result<Self> {
let registry = Arc::new(Registry::new());
let epazote_status = IntGaugeVec::new(
opts!("epazote_status", "Service status (1 = OK, 0 = FAIL)"),
&["service_name"],
)?;
let epazote_failures_total = IntCounterVec::new(
opts!("epazote_failures_total", "Total number of service failures"),
&["service_name"],
)?;
let epazote_response_time = HistogramVec::new(
histogram_opts!(
"epazote_response_time_seconds",
"Service response time in seconds"
),
&["service_name"],
)?;
let epazote_ssl_cert_expiry_seconds = IntGaugeVec::new(
opts!(
"epazote_ssl_cert_expiry_seconds",
"Number of seconds until SSL certificate expiration"
),
&["service_name"],
)?;
registry.register(Box::new(epazote_status.clone()))?;
registry.register(Box::new(epazote_failures_total.clone()))?;
registry.register(Box::new(epazote_response_time.clone()))?;
registry.register(Box::new(epazote_ssl_cert_expiry_seconds.clone()))?;
Ok(Self {
registry,
epazote_status,
epazote_failures_total,
epazote_response_time,
epazote_ssl_cert_expiry_seconds,
})
}
}
pub async fn metrics_server(metrics: Arc<ServiceMetrics>, port: u16) -> Result<()> {
let app = Router::new()
.route("/metrics", get(metrics_handler))
.with_state(metrics);
let listener = match TcpListener::bind(format!("::0:{port}")).await {
Ok(listener) => listener,
Err(_) => TcpListener::bind(format!("0.0.0.0:{port}")).await?,
};
debug!("Metrics server listening on port {}", port);
axum::serve(listener, app.into_make_service())
.await
.map_err(|e| anyhow!("Server error: {e}"))
}
pub async fn metrics_handler(State(metrics): State<Arc<ServiceMetrics>>) -> impl IntoResponse {
let encoder = prometheus::TextEncoder::new();
let metric_families = metrics.registry.gather();
if metric_families.is_empty() {
error!("No metrics collected in the registry.");
return (
StatusCode::INTERNAL_SERVER_ERROR,
"No metrics collected in the registry",
)
.into_response();
}
let mut metrics_str = String::new();
match encoder.encode_utf8(&metric_families, &mut metrics_str) {
Ok(()) => {
debug!("Metrics encoded successfully.");
(StatusCode::OK, metrics_str).into_response()
}
Err(e) => {
error!("Failed to encode metrics: {}", e);
(
StatusCode::INTERNAL_SERVER_ERROR,
"Failed to encode metrics",
)
.into_response()
}
}
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used)]
mod tests {
use super::*;
use crate::cli::{
actions::FallbackState,
actions::client::build_client,
actions::request::{build_http_request, handle_http_response},
config::Config,
};
use mockito::Server;
use std::{collections::HashMap, io::Write, sync::Arc};
use tokio::sync::Mutex;
fn create_config(yaml: &str) -> Config {
let mut tmp_file = tempfile::NamedTempFile::new().expect("Failed to create temp file");
tmp_file
.write_all(yaml.as_bytes())
.expect("Failed to write to temp file");
tmp_file.flush().expect("Failed to flush temp file");
Config::new(tmp_file.path().to_path_buf()).expect("Failed to load config")
}
#[tokio::test]
async fn test_metrics() {
let mut server = Server::new_async().await;
let mock_url = server.url();
let yaml = format!(
r"
---
services:
test:
url: {mock_url}/test
every: 30s
expect:
status: 200
"
);
let config = create_config(&yaml);
let service = config.services.get("test").expect("Service not found");
let _ = env_logger::try_init();
let mock = server
.mock("GET", "/test")
.match_header(
"User-Agent",
mockito::Matcher::Regex("epazote.*".to_string()),
)
.with_status(200)
.create_async()
.await;
let (builder, _client_config) =
build_client(service).expect("Failed to build client builder");
let client = builder.build().expect("Failed to build client");
let request = build_http_request(&client, service).expect("Failed to build request");
let response = client
.execute(request.build().expect("Failed to build request"))
.await
.expect("Failed to execute request");
let counters: Arc<Mutex<HashMap<String, FallbackState>>> =
Arc::new(Mutex::new(HashMap::new()));
let metrics =
Arc::new(ServiceMetrics::new().expect("Failed to initialize service metrics"));
let initial_status = metrics
.epazote_status
.get_metric_with_label_values(&["test"])
.map_or(0, |m| m.get());
let initial_failures = metrics
.epazote_failures_total
.get_metric_with_label_values(&["test"])
.map_or(0, |m| m.get());
let rs = handle_http_response("test", service, response, &metrics, counters.clone()).await;
assert!(rs.is_ok());
let updated_status = metrics
.epazote_status
.get_metric_with_label_values(&["test"])
.map_or(0, |m| m.get());
let updated_failures = metrics
.epazote_failures_total
.get_metric_with_label_values(&["test"])
.map_or(0, |m| m.get());
assert_ne!(
initial_status, updated_status,
"Service status should change after a successful request"
);
assert_eq!(
updated_status, 1,
"Service status should be 1 after a successful request"
);
assert_eq!(
updated_failures, initial_failures,
"Failures should not increase after a successful request"
);
mock.remove();
let _mock = server
.mock("GET", "/test")
.with_status(500)
.create_async()
.await;
let request = build_http_request(&client, service).expect("Failed to build request");
let response = client
.execute(request.build().expect("Failed to build request"))
.await
.expect("Failed to execute request");
let rs = handle_http_response("test", service, response, &metrics, counters)
.await
.expect("Failed to handle HTTP response");
assert!(!rs);
let updated_status = metrics
.epazote_status
.get_metric_with_label_values(&["test"])
.map_or(0, |m| m.get());
assert_eq!(
updated_status, 0,
"Service status should be 0 after a failed request"
);
}
}