Skip to main content

arcly_http/observability/
plugin.rs

1//! `ArclyObservabilityPlugin` — wires up all four observability backends in one
2//! plugin that drops into any arcly-http application.
3//!
4//! ## What it sets up
5//! - **Structured JSON logs** via `tracing-subscriber` (respects `RUST_LOG`).
6//! - **Prometheus metrics** at `GET /metrics`.
7//! - **OTLP distributed traces** exported to the configured gRPC endpoint.
8//! - **Health / readiness probes** at `GET /healthz` and `GET /readyz`.
9//!
10//! ## Usage
11//! ```ignore
12//! App::launch_with_plugins::<AppModule>(
13//!     "0.0.0.0:3000",
14//!     OpenApiInfo { /* ... */ },
15//!     vec![
16//!         Box::new(ArclyObservabilityPlugin {
17//!             service_name:    "my-service",
18//!             service_version: env!("CARGO_PKG_VERSION"),
19//!             otlp_endpoint:   "http://localhost:4317",
20//!         }),
21//!     ],
22//! ).await
23
24use futures::future::BoxFuture;
25use metrics_exporter_prometheus::PrometheusHandle;
26
27use crate::core::engine::{FrozenDiContainer, HttpMethod};
28use crate::core::plugins::{ArclyPlugin, ArclyPluginContext, PluginError};
29
30/// Drop-in observability plugin. Add to `App::launch_with_plugins` to get
31/// structured logs, Prometheus metrics, OTLP traces, and health endpoints
32/// with zero changes to handler code.
33pub struct ArclyObservabilityPlugin {
34    pub service_name: &'static str,
35    pub service_version: &'static str,
36    /// gRPC endpoint of the OTLP collector.
37    /// Example: `"http://localhost:4317"`
38    pub otlp_endpoint: &'static str,
39}
40
41impl ArclyPlugin for ArclyObservabilityPlugin {
42    fn name(&self) -> &'static str {
43        "ArclyObservabilityPlugin"
44    }
45
46    fn on_init<'a>(
47        &'a mut self,
48        ctx: &'a mut ArclyPluginContext,
49    ) -> BoxFuture<'a, Result<(), PluginError>> {
50        Box::pin(async move {
51            // ── 1. Structured JSON logging ─────────────────────────────────
52            use tracing_subscriber::{fmt, EnvFilter};
53            let _ = fmt()
54                .json()
55                .with_env_filter(
56                    EnvFilter::try_from_default_env()
57                        // Suppress opentelemetry_sdk export-failure spam when no
58                        // OTLP collector is running (common in dev / CI).
59                        // Override: RUST_LOG=info,opentelemetry_sdk=warn
60                        .unwrap_or_else(|_| EnvFilter::new("info,opentelemetry_sdk=off")),
61                )
62                .try_init(); // silently ignore "already set" errors
63
64            // ── 2. Prometheus metrics ──────────────────────────────────────
65            let handle: PrometheusHandle = crate::observability::metrics::init_metrics();
66
67            // Provide the handle into DI so the /metrics handler can render it.
68            ctx.provide::<PrometheusHandle>(handle.clone());
69
70            // Register GET /metrics route.
71            let metrics_handler = crate::observability::metrics::metrics_route_handler(handle);
72            ctx.add_route(HttpMethod::GET, "/metrics", metrics_handler);
73
74            // ── 3. OpenTelemetry OTLP traces ───────────────────────────────
75            crate::observability::otel::init_tracer(&crate::observability::otel::OtelConfig {
76                service_name: self.service_name,
77                service_version: self.service_version,
78                otlp_endpoint: self.otlp_endpoint,
79            });
80
81            // ── 4. Health / readiness endpoints ────────────────────────────
82            // Initialise the global registry now so it's ready for other plugins
83            // to register checks in their own on_init.
84            let _ = crate::observability::health::global();
85
86            ctx.add_route(
87                HttpMethod::GET,
88                "/healthz",
89                crate::observability::health::healthz_handler(),
90            );
91            ctx.add_route(
92                HttpMethod::GET,
93                "/readyz",
94                crate::observability::health::readyz_handler(),
95            );
96
97            // ── 5. Inject plugin routes into the OpenAPI spec ─────────────────
98            // Plugin routes are added to the axum router directly (not via the
99            // `inventory` system that macro-routes use), so `build_spec` does not
100            // see them. The `modify_openapi` mutator runs after `build_spec` and
101            // patches the paths object, making all three endpoints visible in the
102            // Swagger UI at /docs.
103            ctx.modify_openapi(|spec| {
104                let Some(paths) = spec
105                    .as_object_mut()
106                    .and_then(|m| m.get_mut("paths"))
107                    .and_then(|v| v.as_object_mut())
108                else { return };
109
110                // Shared schema for /healthz and /readyz responses.
111                let health_schema = serde_json::json!({
112                    "type": "object",
113                    "required": ["status", "checks", "uptime_secs"],
114                    "properties": {
115                        "status": {
116                            "type": "string",
117                            "enum": ["healthy", "degraded", "unhealthy"],
118                            "description": "Overall health of the service."
119                        },
120                        "checks": {
121                            "type": "object",
122                            "description": "Per-subsystem health results.",
123                            "additionalProperties": {
124                                "oneOf": [
125                                    { "type": "string", "enum": ["healthy"] },
126                                    { "type": "object", "required": ["degraded"],   "properties": { "degraded":   { "type": "string" } } },
127                                    { "type": "object", "required": ["unhealthy"], "properties": { "unhealthy": { "type": "string" } } }
128                                ]
129                            }
130                        },
131                        "uptime_secs": {
132                            "type": "integer",
133                            "format": "int64",
134                            "description": "Process uptime in seconds."
135                        }
136                    }
137                });
138
139                paths.insert("/healthz".to_string(), serde_json::json!({
140                    "get": {
141                        "summary": "Liveness probe",
142                        "description": "Returns 200 when the service is healthy or degraded, \
143                                        503 when any registered health check is Unhealthy.",
144                        "operationId": "healthz",
145                        "tags": ["observability"],
146                        "responses": {
147                            "200": {
148                                "description": "Healthy or degraded — service is alive.",
149                                "content": { "application/json": { "schema": health_schema } }
150                            },
151                            "503": {
152                                "description": "Unhealthy — one or more subsystems have failed."
153                            }
154                        }
155                    }
156                }));
157
158                paths.insert("/readyz".to_string(), serde_json::json!({
159                    "get": {
160                        "summary": "Readiness probe",
161                        "description": "Identical to /healthz. Use for Kubernetes `readinessProbe`; \
162                                        load-balancers should stop sending traffic when this returns 503.",
163                        "operationId": "readyz",
164                        "tags": ["observability"],
165                        "responses": {
166                            "200": { "description": "Ready to serve traffic." },
167                            "503": { "description": "Not ready — remove from load-balancer rotation." }
168                        }
169                    }
170                }));
171
172                paths.insert("/metrics".to_string(), serde_json::json!({
173                    "get": {
174                        "summary": "Prometheus metrics scrape",
175                        "description": "Returns Prometheus text-format (version 0.0.4) metrics. \
176                                        Counters: `http_requests_total`. \
177                                        Histogram: `http_request_duration_seconds`. \
178                                        Gauge: `http_requests_in_flight`.",
179                        "operationId": "metrics",
180                        "tags": ["observability"],
181                        "responses": {
182                            "200": {
183                                "description": "Prometheus text-format metric lines.",
184                                "content": {
185                                    "text/plain": {
186                                        "schema": { "type": "string" }
187                                    }
188                                }
189                            }
190                        }
191                    }
192                }));
193            });
194
195            tracing::info!(
196                service = self.service_name,
197                version = self.service_version,
198                otlp = self.otlp_endpoint,
199                "ArclyObservabilityPlugin initialised"
200            );
201
202            Ok(())
203        })
204    }
205
206    fn on_shutdown<'a>(
207        &'a self,
208        _container: &'static FrozenDiContainer,
209    ) -> BoxFuture<'a, Result<(), PluginError>> {
210        Box::pin(async move {
211            opentelemetry::global::shutdown_tracer_provider();
212            Ok(())
213        })
214    }
215}