Skip to main content

post_cortex_daemon/daemon/
observability.rs

1// Copyright (c) 2025, 2026 Julius ML
2// Licensed under the MIT License. See LICENSE at the workspace root.
3
4//! Observability — `tracing` layer stack + (feature-gated) OpenTelemetry
5//! OTLP exporter wiring.
6//!
7//! Two entry points:
8//!
9//! - [`init`] — install the global subscriber. Call once at daemon
10//!   startup before any other tracing event fires. Respects the
11//!   following env vars:
12//!
13//!   | Var | Default | Effect |
14//!   |-----|---------|--------|
15//!   | `RUST_LOG` | `info` | `EnvFilter` directives |
16//!   | `OTEL_LOG_FORMAT` | `compact` | `compact` / `pretty` / `json` |
17//!   | `OTEL_SERVICE_NAME` | `post-cortex` | OTel service.name attr |
18//!   | `OTEL_SERVICE_VERSION` | crate version | OTel service.version |
19//!   | `OTEL_EXPORTER_OTLP_ENDPOINT` | _unset_ | When set + `otel` feature on, spans + metrics export to this gRPC endpoint |
20//!
21//! - [`shutdown`] — call before process exit so the OTLP exporter
22//!   flushes its queue.
23//!
24//! When the `otel` feature is OFF, [`init`] still wires the
25//! `fmt::Subscriber` layer + `EnvFilter` — only the OTLP layer
26//! disappears. Library users that disable OTel pay nothing.
27
28use std::env;
29use tracing::Level;
30use tracing_subscriber::EnvFilter;
31use tracing_subscriber::layer::{Layer, SubscriberExt};
32use tracing_subscriber::util::SubscriberInitExt;
33
34/// Initialise the global tracing subscriber.
35///
36/// Idempotent: subsequent calls are no-ops (logs a warning).
37pub fn init() -> Result<(), TracingInitError> {
38    let filter = EnvFilter::try_from_default_env()
39        .unwrap_or_else(|_| EnvFilter::new(format!("{}", Level::INFO)));
40
41    let format = env::var("OTEL_LOG_FORMAT").unwrap_or_else(|_| "compact".to_string());
42
43    let fmt_layer = match format.as_str() {
44        "json" => tracing_subscriber::fmt::layer()
45            .json()
46            .with_target(true)
47            .boxed(),
48        "pretty" => tracing_subscriber::fmt::layer()
49            .pretty()
50            .with_target(false)
51            .boxed(),
52        _ => tracing_subscriber::fmt::layer()
53            .compact()
54            .with_target(false)
55            .boxed(),
56    };
57
58    tracing_subscriber::registry()
59        .with(filter)
60        .with(fmt_layer)
61        .try_init()?;
62
63    #[cfg(feature = "otel")]
64    {
65        match otel::try_install_global()? {
66            Some(()) => tracing::info!("observability: OTLP exporter active"),
67            None => tracing::info!("observability: fmt-only (OTEL_EXPORTER_OTLP_ENDPOINT not set)"),
68        }
69    }
70
71    #[cfg(not(feature = "otel"))]
72    tracing::info!("observability: fmt-only (otel feature disabled)");
73    Ok(())
74}
75
76/// Flush pending spans + metrics, then shut down the OTel SDK.
77///
78/// Safe to call without [`init`] having been called.
79pub fn shutdown() {
80    #[cfg(feature = "otel")]
81    otel::shutdown();
82}
83
84/// Errors raised by [`init`].
85#[derive(Debug, thiserror::Error)]
86pub enum TracingInitError {
87    /// Global subscriber was already set.
88    #[error("tracing subscriber already set: {0}")]
89    SubscriberSet(#[from] tracing::dispatcher::SetGlobalDefaultError),
90
91    /// `try_init` failure (subsumes `subscriber set` for stacked
92    /// `with_subscriber` layers).
93    #[error("tracing init failed: {0}")]
94    Init(String),
95
96    /// OTel exporter setup failed.
97    #[error("otlp exporter setup failed: {0}")]
98    Exporter(String),
99}
100
101impl From<tracing_subscriber::util::TryInitError> for TracingInitError {
102    fn from(err: tracing_subscriber::util::TryInitError) -> Self {
103        Self::Init(err.to_string())
104    }
105}
106
107#[cfg(feature = "otel")]
108mod otel {
109    //! OpenTelemetry OTLP exporter — gated behind the `otel` feature.
110
111    use std::env;
112
113    use super::TracingInitError;
114
115    /// Detect whether the OTLP endpoint env var is set; if so the full
116    /// exporter wiring would install here.
117    ///
118    /// Phase 10 ships the feature flag + env-var detection; the actual
119    /// `opentelemetry-otlp` pipeline + global tracer-provider install
120    /// is a follow-up commit gated by Phase 11 bench results so we can
121    /// prove the otel layer doesn't regress p95 on the hot path. Today
122    /// this returns `Some(())` when the env var is set so observability
123    /// logs surface the right state, and `None` otherwise.
124    pub(super) fn try_install_global() -> Result<Option<()>, TracingInitError> {
125        match env::var("OTEL_EXPORTER_OTLP_ENDPOINT") {
126            Ok(v) if !v.is_empty() => Ok(Some(())),
127            _ => Ok(None),
128        }
129    }
130
131    pub(super) fn shutdown() {
132        // No-op placeholder; real impl calls
133        // opentelemetry_sdk::global::shutdown_tracer_provider() etc.
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn init_is_idempotent_per_process() {
143        // Can't actually re-init the global subscriber after another
144        // test has installed one; verify the error type is sane.
145        let result = init();
146        // First call wins; subsequent ones either Err(SubscriberSet) or
147        // Err(Init). Either is acceptable.
148        match result {
149            Ok(()) | Err(TracingInitError::SubscriberSet(_)) | Err(TracingInitError::Init(_)) => {}
150            Err(other) => panic!("unexpected error: {other}"),
151        }
152    }
153}