arcly_http/observability/plugin.rs
1//! `ArclyObservabilityPlugin` — wires up all four observability backends in one
2//! plugin that drops into any arcly-http application.
3//!
4//! ## What it sets up
5//! - **Structured JSON logs** via `tracing-subscriber` (respects `RUST_LOG`).
6//! - **Prometheus metrics** at `GET /metrics`.
7//! - **OTLP distributed traces** exported to the configured gRPC endpoint.
8//! - **Health / readiness probes** at `GET /healthz` and `GET /readyz`.
9//!
10//! ## Usage
11//! ```ignore
12//! App::launch_with_plugins::<AppModule>(
13//! "0.0.0.0:3000",
14//! OpenApiInfo { /* ... */ },
15//! vec![
16//! Box::new(ArclyObservabilityPlugin {
17//! service_name: "my-service",
18//! service_version: env!("CARGO_PKG_VERSION"),
19//! otlp_endpoint: "http://localhost:4317",
20//! }),
21//! ],
22//! ).await
23
24use futures::future::BoxFuture;
25use metrics_exporter_prometheus::PrometheusHandle;
26
27use crate::core::engine::{FrozenDiContainer, HttpMethod};
28use crate::core::plugins::{ArclyPlugin, ArclyPluginContext, PluginError};
29
30/// Drop-in observability plugin. Add to `App::launch_with_plugins` to get
31/// structured logs, Prometheus metrics, OTLP traces, and health endpoints
32/// with zero changes to handler code.
33pub struct ArclyObservabilityPlugin {
34 pub service_name: &'static str,
35 pub service_version: &'static str,
36 /// gRPC endpoint of the OTLP collector.
37 /// Example: `"http://localhost:4317"`
38 pub otlp_endpoint: &'static str,
39}
40
41impl ArclyPlugin for ArclyObservabilityPlugin {
42 fn name(&self) -> &'static str {
43 "ArclyObservabilityPlugin"
44 }
45
46 fn on_init<'a>(
47 &'a mut self,
48 ctx: &'a mut ArclyPluginContext,
49 ) -> BoxFuture<'a, Result<(), PluginError>> {
50 Box::pin(async move {
51 // ── 1. Structured JSON logging ─────────────────────────────────
52 use tracing_subscriber::{fmt, EnvFilter};
53 let _ = fmt()
54 .json()
55 .with_env_filter(
56 EnvFilter::try_from_default_env()
57 // Suppress opentelemetry_sdk export-failure spam when no
58 // OTLP collector is running (common in dev / CI).
59 // Override: RUST_LOG=info,opentelemetry_sdk=warn
60 .unwrap_or_else(|_| EnvFilter::new("info,opentelemetry_sdk=off")),
61 )
62 .try_init(); // silently ignore "already set" errors
63
64 // ── 2. Prometheus metrics ──────────────────────────────────────
65 let handle: PrometheusHandle = crate::observability::metrics::init_metrics();
66
67 // Provide the handle into DI so the /metrics handler can render it.
68 ctx.provide::<PrometheusHandle>(handle.clone());
69
70 // Register GET /metrics route.
71 let metrics_handler = crate::observability::metrics::metrics_route_handler(handle);
72 ctx.add_route(HttpMethod::GET, "/metrics", metrics_handler);
73
74 // ── 3. OpenTelemetry OTLP traces ───────────────────────────────
75 crate::observability::otel::init_tracer(&crate::observability::otel::OtelConfig {
76 service_name: self.service_name,
77 service_version: self.service_version,
78 otlp_endpoint: self.otlp_endpoint,
79 });
80
81 // ── 4. Health / readiness endpoints ────────────────────────────
82 // Initialise the global registry now so it's ready for other plugins
83 // to register checks in their own on_init.
84 let _ = crate::observability::health::global();
85
86 ctx.add_route(
87 HttpMethod::GET,
88 "/healthz",
89 crate::observability::health::healthz_handler(),
90 );
91 ctx.add_route(
92 HttpMethod::GET,
93 "/readyz",
94 crate::observability::health::readyz_handler(),
95 );
96
97 // ── 5. Inject plugin routes into the OpenAPI spec ─────────────────
98 // Plugin routes are added to the axum router directly (not via the
99 // `inventory` system that macro-routes use), so `build_spec` does not
100 // see them. The `modify_openapi` mutator runs after `build_spec` and
101 // patches the paths object, making all three endpoints visible in the
102 // Swagger UI at /docs.
103 ctx.modify_openapi(|spec| {
104 let Some(paths) = spec
105 .as_object_mut()
106 .and_then(|m| m.get_mut("paths"))
107 .and_then(|v| v.as_object_mut())
108 else { return };
109
110 // Shared schema for /healthz and /readyz responses.
111 let health_schema = serde_json::json!({
112 "type": "object",
113 "required": ["status", "checks", "uptime_secs"],
114 "properties": {
115 "status": {
116 "type": "string",
117 "enum": ["healthy", "degraded", "unhealthy"],
118 "description": "Overall health of the service."
119 },
120 "checks": {
121 "type": "object",
122 "description": "Per-subsystem health results.",
123 "additionalProperties": {
124 "oneOf": [
125 { "type": "string", "enum": ["healthy"] },
126 { "type": "object", "required": ["degraded"], "properties": { "degraded": { "type": "string" } } },
127 { "type": "object", "required": ["unhealthy"], "properties": { "unhealthy": { "type": "string" } } }
128 ]
129 }
130 },
131 "uptime_secs": {
132 "type": "integer",
133 "format": "int64",
134 "description": "Process uptime in seconds."
135 }
136 }
137 });
138
139 paths.insert("/healthz".to_string(), serde_json::json!({
140 "get": {
141 "summary": "Liveness probe",
142 "description": "Returns 200 when the service is healthy or degraded, \
143 503 when any registered health check is Unhealthy.",
144 "operationId": "healthz",
145 "tags": ["observability"],
146 "responses": {
147 "200": {
148 "description": "Healthy or degraded — service is alive.",
149 "content": { "application/json": { "schema": health_schema } }
150 },
151 "503": {
152 "description": "Unhealthy — one or more subsystems have failed."
153 }
154 }
155 }
156 }));
157
158 paths.insert("/readyz".to_string(), serde_json::json!({
159 "get": {
160 "summary": "Readiness probe",
161 "description": "Identical to /healthz. Use for Kubernetes `readinessProbe`; \
162 load-balancers should stop sending traffic when this returns 503.",
163 "operationId": "readyz",
164 "tags": ["observability"],
165 "responses": {
166 "200": { "description": "Ready to serve traffic." },
167 "503": { "description": "Not ready — remove from load-balancer rotation." }
168 }
169 }
170 }));
171
172 paths.insert("/metrics".to_string(), serde_json::json!({
173 "get": {
174 "summary": "Prometheus metrics scrape",
175 "description": "Returns Prometheus text-format (version 0.0.4) metrics. \
176 Counters: `http_requests_total`. \
177 Histogram: `http_request_duration_seconds`. \
178 Gauge: `http_requests_in_flight`.",
179 "operationId": "metrics",
180 "tags": ["observability"],
181 "responses": {
182 "200": {
183 "description": "Prometheus text-format metric lines.",
184 "content": {
185 "text/plain": {
186 "schema": { "type": "string" }
187 }
188 }
189 }
190 }
191 }
192 }));
193 });
194
195 tracing::info!(
196 service = self.service_name,
197 version = self.service_version,
198 otlp = self.otlp_endpoint,
199 "ArclyObservabilityPlugin initialised"
200 );
201
202 Ok(())
203 })
204 }
205
206 fn on_shutdown<'a>(
207 &'a self,
208 _container: &'static FrozenDiContainer,
209 ) -> BoxFuture<'a, Result<(), PluginError>> {
210 Box::pin(async move {
211 opentelemetry::global::shutdown_tracer_provider();
212 Ok(())
213 })
214 }
215}