Skip to main content

klauthed_observability/
lib.rs

1#![deny(unsafe_code)]
2#![deny(missing_docs)]
3#![cfg_attr(
4    not(test),
5    deny(clippy::unwrap_used, clippy::expect_used, clippy::panic, clippy::indexing_slicing)
6)]
7
8//! Observability for klauthed services: structured logging/tracing, Prometheus
9//! metrics, and OpenTelemetry trace export — from one [`TelemetryConfig`].
10//!
11//! [`init`] installs the global tracing subscriber (and, per feature + config,
12//! the metrics recorder and the OTLP trace pipeline) and returns a [`Telemetry`]
13//! handle. Keep it alive for the program's lifetime; dropping it flushes
14//! OpenTelemetry spans.
15//!
16//! ```no_run
17//! use klauthed_observability::{init, TelemetryConfig};
18//! use klauthed_core::config::Profile;
19//!
20//! let config = TelemetryConfig::for_profile(&Profile::detect(), "billing-api");
21//! let _telemetry = init(&config).expect("telemetry init");
22//! tracing::info!("service starting");
23//! ```
24//!
25//! Features:
26//! * `metrics` — Prometheus recorder + a `/metrics` render handle.
27//! * `otel` — OTLP trace export wired into the tracing subscriber.
28
29mod config;
30mod error;
31mod logging;
32mod trace;
33
34#[cfg(feature = "metrics")]
35pub mod metrics;
36
37#[cfg(feature = "otel")]
38mod otel;
39
40pub use config::{LogConfig, LogFormat, MetricsConfig, OtelConfig, TelemetryConfig};
41pub use error::ObservabilityError;
42#[cfg(feature = "otel")]
43pub use trace::propagation;
44pub use trace::{RecordContext, request_span};
45
46/// Common imports for telemetry setup: `use klauthed_observability::prelude::*;`.
47pub mod prelude {
48    pub use crate::{
49        LogConfig, LogFormat, MetricsConfig, ObservabilityError, OtelConfig, RecordContext,
50        TelemetryConfig, init, request_span,
51    };
52}
53
54use tracing_subscriber::Registry;
55use tracing_subscriber::prelude::*;
56
57/// A live telemetry installation. Hold it for the program's lifetime.
58pub struct Telemetry {
59    // Dropped last; flushes OpenTelemetry on shutdown.
60    _guard: Guard,
61    #[cfg(feature = "metrics")]
62    metrics: Option<metrics::MetricsHandle>,
63}
64
65impl Telemetry {
66    /// The Prometheus render handle, if metrics were installed.
67    #[cfg(feature = "metrics")]
68    pub fn metrics(&self) -> Option<&metrics::MetricsHandle> {
69        self.metrics.as_ref()
70    }
71}
72
73/// Initialize telemetry from `config`, installing the global subscriber and,
74/// per features and config, the metrics recorder and OTLP trace pipeline.
75pub fn init(config: &TelemetryConfig) -> Result<Telemetry, ObservabilityError> {
76    #[cfg(feature = "metrics")]
77    let metrics = if config.metrics.enabled { Some(metrics::install()?) } else { None };
78
79    // `mut` is only used when the otel layer is pushed below.
80    #[cfg_attr(not(feature = "otel"), allow(unused_mut))]
81    let mut layers: Vec<logging::BoxedLayer> = vec![logging::fmt_layer(&config.log)];
82
83    #[cfg(feature = "otel")]
84    let tracer_provider = if config.otel.enabled {
85        let (layer, provider) = otel::trace_layer(config)?;
86        layers.push(layer);
87        Some(provider)
88    } else {
89        None
90    };
91
92    // Layers (typed over `Registry`) go on first; the global level filter is
93    // applied outermost so it gates the whole stack.
94    Registry::default()
95        .with(layers)
96        .with(logging::env_filter(&config.log))
97        .try_init()
98        .map_err(|e| ObservabilityError::Subscriber(e.to_string()))?;
99
100    Ok(Telemetry {
101        _guard: Guard {
102            #[cfg(feature = "otel")]
103            tracer_provider,
104        },
105        #[cfg(feature = "metrics")]
106        metrics,
107    })
108}
109
110/// Holds resources that must outlive `init` and be cleaned up on shutdown.
111struct Guard {
112    #[cfg(feature = "otel")]
113    tracer_provider: Option<opentelemetry_sdk::trace::SdkTracerProvider>,
114}
115
116impl Drop for Guard {
117    fn drop(&mut self) {
118        #[cfg(feature = "otel")]
119        if let Some(provider) = self.tracer_provider.take() {
120            // Best-effort flush of pending spans on shutdown.
121            let _ = provider.shutdown();
122        }
123    }
124}