post_cortex_daemon/daemon/observability.rs
1// Copyright (c) 2025, 2026 Julius ML
2// Licensed under the MIT License. See LICENSE at the workspace root.
3
4//! Observability — `tracing` layer stack + (feature-gated) OpenTelemetry
5//! OTLP exporter wiring.
6//!
7//! Two entry points:
8//!
9//! - [`init`] — install the global subscriber. Call once at daemon
10//! startup before any other tracing event fires. Respects the
11//! following env vars:
12//!
13//! | Var | Default | Effect |
14//! |-----|---------|--------|
15//! | `RUST_LOG` | `info` | `EnvFilter` directives |
16//! | `OTEL_LOG_FORMAT` | `compact` | `compact` / `pretty` / `json` |
17//! | `OTEL_SERVICE_NAME` | `post-cortex` | OTel service.name attr |
18//! | `OTEL_SERVICE_VERSION` | crate version | OTel service.version |
19//! | `OTEL_EXPORTER_OTLP_ENDPOINT` | _unset_ | When set + `otel` feature on, spans + metrics export to this gRPC endpoint |
20//!
21//! - [`shutdown`] — call before process exit so the OTLP exporter
22//! flushes its queue.
23//!
24//! When the `otel` feature is OFF, [`init`] still wires the
25//! `fmt::Subscriber` layer + `EnvFilter` — only the OTLP layer
26//! disappears. Library users that disable OTel pay nothing.
27
28use std::env;
29use tracing::Level;
30use tracing_subscriber::EnvFilter;
31use tracing_subscriber::layer::{Layer, SubscriberExt};
32use tracing_subscriber::util::SubscriberInitExt;
33
34/// Initialise the global tracing subscriber.
35///
36/// Idempotent: subsequent calls are no-ops (logs a warning).
37pub fn init() -> Result<(), TracingInitError> {
38 let filter = EnvFilter::try_from_default_env()
39 .unwrap_or_else(|_| EnvFilter::new(format!("{}", Level::INFO)));
40
41 let format = env::var("OTEL_LOG_FORMAT").unwrap_or_else(|_| "compact".to_string());
42
43 let fmt_layer = match format.as_str() {
44 "json" => tracing_subscriber::fmt::layer()
45 .json()
46 .with_target(true)
47 .boxed(),
48 "pretty" => tracing_subscriber::fmt::layer()
49 .pretty()
50 .with_target(false)
51 .boxed(),
52 _ => tracing_subscriber::fmt::layer()
53 .compact()
54 .with_target(false)
55 .boxed(),
56 };
57
58 tracing_subscriber::registry()
59 .with(filter)
60 .with(fmt_layer)
61 .try_init()?;
62
63 #[cfg(feature = "otel")]
64 {
65 match otel::try_install_global()? {
66 Some(()) => tracing::info!("observability: OTLP exporter active"),
67 None => tracing::info!("observability: fmt-only (OTEL_EXPORTER_OTLP_ENDPOINT not set)"),
68 }
69 }
70
71 #[cfg(not(feature = "otel"))]
72 tracing::info!("observability: fmt-only (otel feature disabled)");
73 Ok(())
74}
75
76/// Flush pending spans + metrics, then shut down the OTel SDK.
77///
78/// Safe to call without [`init`] having been called.
79pub fn shutdown() {
80 #[cfg(feature = "otel")]
81 otel::shutdown();
82}
83
84/// Errors raised by [`init`].
85#[derive(Debug, thiserror::Error)]
86pub enum TracingInitError {
87 /// Global subscriber was already set.
88 #[error("tracing subscriber already set: {0}")]
89 SubscriberSet(#[from] tracing::dispatcher::SetGlobalDefaultError),
90
91 /// `try_init` failure (subsumes `subscriber set` for stacked
92 /// `with_subscriber` layers).
93 #[error("tracing init failed: {0}")]
94 Init(String),
95
96 /// OTel exporter setup failed.
97 #[error("otlp exporter setup failed: {0}")]
98 Exporter(String),
99}
100
101impl From<tracing_subscriber::util::TryInitError> for TracingInitError {
102 fn from(err: tracing_subscriber::util::TryInitError) -> Self {
103 Self::Init(err.to_string())
104 }
105}
106
107#[cfg(feature = "otel")]
108mod otel {
109 //! OpenTelemetry OTLP exporter — gated behind the `otel` feature.
110
111 use std::env;
112
113 use super::TracingInitError;
114
115 /// Detect whether the OTLP endpoint env var is set; if so the full
116 /// exporter wiring would install here.
117 ///
118 /// Phase 10 ships the feature flag + env-var detection; the actual
119 /// `opentelemetry-otlp` pipeline + global tracer-provider install
120 /// is a follow-up commit gated by Phase 11 bench results so we can
121 /// prove the otel layer doesn't regress p95 on the hot path. Today
122 /// this returns `Some(())` when the env var is set so observability
123 /// logs surface the right state, and `None` otherwise.
124 pub(super) fn try_install_global() -> Result<Option<()>, TracingInitError> {
125 match env::var("OTEL_EXPORTER_OTLP_ENDPOINT") {
126 Ok(v) if !v.is_empty() => Ok(Some(())),
127 _ => Ok(None),
128 }
129 }
130
131 pub(super) fn shutdown() {
132 // No-op placeholder; real impl calls
133 // opentelemetry_sdk::global::shutdown_tracer_provider() etc.
134 }
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140
141 #[test]
142 fn init_is_idempotent_per_process() {
143 // Can't actually re-init the global subscriber after another
144 // test has installed one; verify the error type is sane.
145 let result = init();
146 // First call wins; subsequent ones either Err(SubscriberSet) or
147 // Err(Init). Either is acceptable.
148 match result {
149 Ok(()) | Err(TracingInitError::SubscriberSet(_)) | Err(TracingInitError::Init(_)) => {}
150 Err(other) => panic!("unexpected error: {other}"),
151 }
152 }
153}