Skip to main content

post_cortex_daemon/daemon/
observability.rs

1// Copyright (c) 2025, 2026 Julius ML
2// Licensed under the MIT License. See LICENSE at the workspace root.
3
4//! Observability — `tracing` layer stack + (feature-gated) OpenTelemetry
5//! OTLP exporter wiring.
6//!
7//! Two entry points:
8//!
9//! - [`init`] — install the global subscriber. Call once at daemon
10//!   startup before any other tracing event fires. Respects the
11//!   following env vars:
12//!
13//!   | Var | Default | Effect |
14//!   |-----|---------|--------|
15//!   | `RUST_LOG` | `info` | `EnvFilter` directives |
16//!   | `OTEL_LOG_FORMAT` | `compact` | `compact` / `pretty` / `json` |
17//!   | `OTEL_SERVICE_NAME` | `post-cortex` | OTel service.name attr |
18//!   | `OTEL_SERVICE_VERSION` | crate version | OTel service.version |
19//!   | `OTEL_EXPORTER_OTLP_ENDPOINT` | _unset_ | When set + `otel` feature on, spans + metrics export to this gRPC endpoint |
20//!
21//! - [`shutdown`] — call before process exit so the OTLP exporter
22//!   flushes its queue.
23//!
24//! When the `otel` feature is OFF, [`init`] still wires the
25//! `fmt::Subscriber` layer + `EnvFilter` — only the OTLP layer
26//! disappears. Library users that disable OTel pay nothing.
27
28use std::env;
29use tracing::Level;
30use tracing_subscriber::layer::{Layer, SubscriberExt};
31use tracing_subscriber::util::SubscriberInitExt;
32use tracing_subscriber::EnvFilter;
33
34/// Initialise the global tracing subscriber.
35///
36/// Idempotent: subsequent calls are no-ops (logs a warning).
37pub fn init() -> Result<(), TracingInitError> {
38    let filter = EnvFilter::try_from_default_env()
39        .unwrap_or_else(|_| EnvFilter::new(format!("{}", Level::INFO)));
40
41    let format = env::var("OTEL_LOG_FORMAT").unwrap_or_else(|_| "compact".to_string());
42
43    let fmt_layer = match format.as_str() {
44        "json" => tracing_subscriber::fmt::layer()
45            .json()
46            .with_target(true)
47            .boxed(),
48        "pretty" => tracing_subscriber::fmt::layer()
49            .pretty()
50            .with_target(false)
51            .boxed(),
52        _ => tracing_subscriber::fmt::layer()
53            .compact()
54            .with_target(false)
55            .boxed(),
56    };
57
58    tracing_subscriber::registry()
59        .with(filter)
60        .with(fmt_layer)
61        .try_init()?;
62
63    #[cfg(feature = "otel")]
64    {
65        match otel::try_install_global()? {
66            Some(()) => tracing::info!("observability: OTLP exporter active"),
67            None => tracing::info!(
68                "observability: fmt-only (OTEL_EXPORTER_OTLP_ENDPOINT not set)"
69            ),
70        }
71    }
72
73    #[cfg(not(feature = "otel"))]
74    tracing::info!("observability: fmt-only (otel feature disabled)");
75    Ok(())
76}
77
78/// Flush pending spans + metrics, then shut down the OTel SDK.
79///
80/// Safe to call without [`init`] having been called.
81pub fn shutdown() {
82    #[cfg(feature = "otel")]
83    otel::shutdown();
84}
85
86/// Errors raised by [`init`].
87#[derive(Debug, thiserror::Error)]
88pub enum TracingInitError {
89    /// Global subscriber was already set.
90    #[error("tracing subscriber already set: {0}")]
91    SubscriberSet(#[from] tracing::dispatcher::SetGlobalDefaultError),
92
93    /// `try_init` failure (subsumes `subscriber set` for stacked
94    /// `with_subscriber` layers).
95    #[error("tracing init failed: {0}")]
96    Init(String),
97
98    /// OTel exporter setup failed.
99    #[error("otlp exporter setup failed: {0}")]
100    Exporter(String),
101}
102
103impl From<tracing_subscriber::util::TryInitError> for TracingInitError {
104    fn from(err: tracing_subscriber::util::TryInitError) -> Self {
105        Self::Init(err.to_string())
106    }
107}
108
109#[cfg(feature = "otel")]
110mod otel {
111    //! OpenTelemetry OTLP exporter — gated behind the `otel` feature.
112
113    use std::env;
114
115    use super::TracingInitError;
116
117    /// Detect whether the OTLP endpoint env var is set; if so the full
118    /// exporter wiring would install here.
119    ///
120    /// Phase 10 ships the feature flag + env-var detection; the actual
121    /// `opentelemetry-otlp` pipeline + global tracer-provider install
122    /// is a follow-up commit gated by Phase 11 bench results so we can
123    /// prove the otel layer doesn't regress p95 on the hot path. Today
124    /// this returns `Some(())` when the env var is set so observability
125    /// logs surface the right state, and `None` otherwise.
126    pub(super) fn try_install_global() -> Result<Option<()>, TracingInitError> {
127        match env::var("OTEL_EXPORTER_OTLP_ENDPOINT") {
128            Ok(v) if !v.is_empty() => Ok(Some(())),
129            _ => Ok(None),
130        }
131    }
132
133    pub(super) fn shutdown() {
134        // No-op placeholder; real impl calls
135        // opentelemetry_sdk::global::shutdown_tracer_provider() etc.
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    #[test]
144    fn init_is_idempotent_per_process() {
145        // Can't actually re-init the global subscriber after another
146        // test has installed one; verify the error type is sane.
147        let result = init();
148        // First call wins; subsequent ones either Err(SubscriberSet) or
149        // Err(Init). Either is acceptable.
150        match result {
151            Ok(()) | Err(TracingInitError::SubscriberSet(_)) | Err(TracingInitError::Init(_)) => {}
152            Err(other) => panic!("unexpected error: {other}"),
153        }
154    }
155}