Skip to main content

miden_node_utils/
logging.rs

1use std::str::FromStr;
2use std::sync::OnceLock;
3
4use opentelemetry::trace::TracerProvider as _;
5use opentelemetry::{KeyValue, Value};
6use opentelemetry_otlp::WithTonicConfig as _;
7use opentelemetry_sdk::Resource;
8use opentelemetry_sdk::propagation::TraceContextPropagator;
9use opentelemetry_sdk::resource::{EnvResourceDetector, TelemetryResourceDetector};
10use opentelemetry_sdk::trace::SdkTracerProvider;
11use tracing::subscriber::Subscriber;
12use tracing_opentelemetry::OpenTelemetryLayer;
13use tracing_subscriber::layer::{Filter, SubscriberExt};
14use tracing_subscriber::{Layer, Registry};
15
16use crate::tracing::OpenTelemetrySpanExt;
17
18/// Global tracer provider for flushing traces on panic.
19///
20/// This is necessary because the panic hook needs access to the tracer provider to flush
21/// pending spans before the program terminates.
22static TRACER_PROVIDER: OnceLock<SdkTracerProvider> = OnceLock::new();
23
24/// Default OpenTelemetry resource attributes for this process.
25#[derive(Clone, Default)]
26pub struct ResourceConfig {
27    service_name: Option<&'static str>,
28    attributes: Vec<(&'static str, &'static str)>,
29}
30
31impl ResourceConfig {
32    #[must_use]
33    pub fn with_name(mut self, service_name: &'static str) -> Self {
34        self.service_name = Some(service_name);
35        self
36    }
37
38    #[must_use]
39    pub fn with_attribute(mut self, key: &'static str, value: &'static str) -> Self {
40        self.attributes.push((key, value));
41        self
42    }
43}
44
45/// Configures [`setup_tracing`] to enable or disable the open-telemetry exporter.
46#[derive(Clone)]
47pub enum OpenTelemetry {
48    Enabled(ResourceConfig),
49    Disabled,
50}
51
52impl OpenTelemetry {
53    pub fn enabled() -> Self {
54        OpenTelemetry::Enabled(ResourceConfig::default())
55    }
56
57    pub fn from_env() -> Self {
58        if otlp_endpoint_configured() {
59            OpenTelemetry::enabled()
60        } else {
61            OpenTelemetry::Disabled
62        }
63    }
64
65    #[must_use]
66    pub fn with_name(self, service_name: &'static str) -> Self {
67        match self {
68            OpenTelemetry::Enabled(config) => {
69                OpenTelemetry::Enabled(config.with_name(service_name))
70            },
71            OpenTelemetry::Disabled => OpenTelemetry::Disabled,
72        }
73    }
74
75    #[must_use]
76    pub fn with_attribute(self, key: &'static str, value: &'static str) -> Self {
77        match self {
78            OpenTelemetry::Enabled(config) => {
79                OpenTelemetry::Enabled(config.with_attribute(key, value))
80            },
81            OpenTelemetry::Disabled => OpenTelemetry::Disabled,
82        }
83    }
84
85    fn is_enabled(&self) -> bool {
86        matches!(self, OpenTelemetry::Enabled(_))
87    }
88
89    fn resource_config(self) -> Option<ResourceConfig> {
90        match self {
91            OpenTelemetry::Enabled(config) => Some(config),
92            OpenTelemetry::Disabled => None,
93        }
94    }
95}
96
97/// A guard that shuts down the tracer provider when dropped. This ensures that the logs are flushed
98/// to the exporter before the program exits.
99pub struct OtelGuard {
100    tracer_provider: SdkTracerProvider,
101}
102
103impl Drop for OtelGuard {
104    fn drop(&mut self) {
105        if let Err(err) = self.tracer_provider.shutdown() {
106            eprintln!("{err:?}");
107        }
108    }
109}
110
111/// Initializes tracing to stdout and optionally an open-telemetry exporter.
112///
113/// Trace filtering defaults to `INFO` and can be configured using the conventional `RUST_LOG`
114/// environment variable.
115///
116/// The open-telemetry configuration is controlled via environment variables as defined in the
117/// [specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md#opentelemetry-protocol-exporter)
118///
119/// Registers a panic hook so that panic errors are reported to the open-telemetry exporter.
120///
121/// Returns an [`OtelGuard`] if open-telemetry is enabled, otherwise `None`. When this guard is
122/// dropped, the tracer provider is shutdown.
123pub fn setup_tracing(otel: OpenTelemetry) -> anyhow::Result<Option<OtelGuard>> {
124    if otel.is_enabled() {
125        opentelemetry::global::set_text_map_propagator(TraceContextPropagator::new());
126    }
127
128    // Note: open-telemetry requires a tokio-runtime, so this _must_ be lazily evaluated (aka not
129    // `then_some`) to avoid crashing sync callers (with OpenTelemetry::Disabled set). Examples of
130    // such callers are tests with logging enabled.
131    let tracer_provider = if otel.is_enabled() {
132        let provider = init_tracer_provider(
133            otel.resource_config()
134                .expect("resource config is set when OpenTelemetry is enabled"),
135        )?;
136
137        // Store the provider globally so the panic hook can flush it. SdkTracerProvider is
138        // internally reference-counted, so cloning is cheap.
139        TRACER_PROVIDER
140            .set(provider.clone())
141            .expect("setup_tracing should only be called once");
142
143        Some(provider)
144    } else {
145        None
146    };
147    let otel_layer = tracer_provider.as_ref().map(|provider| {
148        OpenTelemetryLayer::new(provider.tracer("tracing-otel-subscriber")).boxed()
149    });
150
151    let subscriber = Registry::default()
152        .with(stdout_layer().with_filter(env_or_default_filter()))
153        .with(otel_layer.with_filter(env_or_default_filter()));
154    tracing::subscriber::set_global_default(subscriber).map_err(Into::<anyhow::Error>::into)?;
155
156    // Register panic hook now that tracing is initialized. This chains with the default panic hook
157    // to preserve backtrace printing.
158    let default_hook = std::panic::take_hook();
159    std::panic::set_hook(Box::new(move |info| {
160        tracing::error!(panic = true, info = %info, "panic");
161
162        // Mark the current span as failed for OpenTelemetry.
163        let info_str = info.to_string();
164        let wrapped = anyhow::Error::msg(info_str);
165        tracing::Span::current().set_error(wrapped.as_ref());
166
167        // Flush traces before the program terminates. This ensures the panic trace is exported even
168        // though the OtelGuard won't be dropped.
169        if let Some(provider) = TRACER_PROVIDER.get() {
170            if let Err(err) = provider.force_flush() {
171                eprintln!("Failed to flush traces on panic: {err:?}");
172            }
173        }
174
175        // Call the default hook to print the backtrace.
176        default_hook(info);
177    }));
178
179    Ok(tracer_provider.map(|tracer_provider| OtelGuard { tracer_provider }))
180}
181
182fn init_tracer_provider(resource_config: ResourceConfig) -> anyhow::Result<SdkTracerProvider> {
183    let exporter = opentelemetry_otlp::SpanExporter::builder()
184        .with_tonic()
185        .with_tls_config(tonic::transport::ClientTlsConfig::new().with_enabled_roots())
186        .build()?;
187    let resource = resource(resource_config);
188
189    Ok(opentelemetry_sdk::trace::SdkTracerProvider::builder()
190        .with_resource(resource)
191        .with_batch_exporter(exporter)
192        .build())
193}
194
195fn resource(config: ResourceConfig) -> Resource {
196    let detected_resource = Resource::builder_empty()
197        .with_detector(Box::new(TelemetryResourceDetector))
198        .with_detector(Box::new(EnvResourceDetector::new()))
199        .build();
200
201    resource_from_detected(config, &detected_resource, otel_service_name_override())
202}
203
204fn resource_from_detected(
205    config: ResourceConfig,
206    detected_resource: &Resource,
207    service_name_override: Option<Value>,
208) -> Resource {
209    const SERVICE_NAME: &str = "service.name";
210    const SERVICE_NAMESPACE: &str = "service.namespace";
211
212    let mut attributes =
213        std::collections::BTreeMap::from([(SERVICE_NAMESPACE.to_string(), Value::from("miden"))]);
214
215    if let Some(service_name) = config.service_name {
216        attributes.insert(SERVICE_NAME.to_string(), Value::from(service_name));
217    }
218
219    for (key, value) in config.attributes {
220        attributes.insert(key.to_string(), Value::from(value));
221    }
222
223    // Environment resource attributes override defaults above, and OTEL_SERVICE_NAME overrides
224    // both.
225    for (key, value) in detected_resource {
226        attributes.insert(key.as_str().to_string(), value.clone());
227    }
228
229    if let Some(service_name) = service_name_override {
230        attributes.insert(SERVICE_NAME.to_string(), service_name);
231    }
232
233    Resource::builder_empty()
234        .with_attributes(attributes.into_iter().map(|(key, value)| KeyValue::new(key, value)))
235        .build()
236}
237
238fn otel_service_name_override() -> Option<Value> {
239    std::env::var("OTEL_SERVICE_NAME")
240        .ok()
241        .filter(|value| !value.is_empty())
242        .map(Value::from)
243}
244
245fn otlp_endpoint_configured() -> bool {
246    ["OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "OTEL_EXPORTER_OTLP_ENDPOINT"]
247        .into_iter()
248        .any(|key| std::env::var(key).is_ok_and(|value| !value.trim().is_empty()))
249}
250
251/// Initializes tracing to a test exporter.
252///
253/// Allows trace content to be inspected via the returned receiver.
254///
255/// All tests that use this function must be annotated with `#[serial(open_telemetry_tracing)]`.
256/// This forces serialization of all such tests. Otherwise, the tested spans could
257/// be interleaved during runtime. Also, the global exporter could be re-initialized in
258/// the middle of a concurrently running test.
259#[cfg(feature = "testing")]
260pub fn setup_test_tracing() -> anyhow::Result<(
261    tokio::sync::mpsc::UnboundedReceiver<opentelemetry_sdk::trace::SpanData>,
262    tokio::sync::mpsc::UnboundedReceiver<()>,
263)> {
264    let (exporter, rx_export, rx_shutdown) =
265        opentelemetry_sdk::testing::trace::new_tokio_test_exporter();
266
267    let tracer_provider = opentelemetry_sdk::trace::SdkTracerProvider::builder()
268        .with_batch_exporter(exporter)
269        .build();
270    let otel_layer =
271        OpenTelemetryLayer::new(tracer_provider.tracer("tracing-otel-subscriber")).boxed();
272    let subscriber = Registry::default()
273        .with(stdout_layer().with_filter(env_or_default_filter()))
274        .with(otel_layer.with_filter(env_or_default_filter()));
275    tracing::subscriber::set_global_default(subscriber)?;
276    Ok((rx_export, rx_shutdown))
277}
278
279#[cfg(not(feature = "tracing-forest"))]
280fn stdout_layer<S>() -> Box<dyn tracing_subscriber::Layer<S> + Send + Sync + 'static>
281where
282    S: Subscriber,
283    for<'a> S: tracing_subscriber::registry::LookupSpan<'a>,
284{
285    use tracing_subscriber::fmt::format::FmtSpan;
286
287    tracing_subscriber::fmt::layer()
288        .pretty()
289        .compact()
290        .with_level(true)
291        .with_file(true)
292        .with_line_number(true)
293        .with_target(true)
294        .with_span_events(FmtSpan::CLOSE)
295        .boxed()
296}
297
298#[cfg(feature = "tracing-forest")]
299fn stdout_layer<S>() -> Box<dyn tracing_subscriber::Layer<S> + Send + Sync + 'static>
300where
301    S: Subscriber,
302    for<'a> S: tracing_subscriber::registry::LookupSpan<'a>,
303{
304    tracing_forest::ForestLayer::default().boxed()
305}
306
307/// Creates a filter from the `RUST_LOG` env var with a default of `INFO` if unset.
308///
309/// # Panics
310///
311/// Panics if `RUST_LOG` fails to parse.
312fn env_or_default_filter<S>() -> Box<dyn Filter<S> + Send + Sync + 'static> {
313    use tracing::level_filters::LevelFilter;
314    use tracing_subscriber::EnvFilter;
315    use tracing_subscriber::filter::{FilterExt, Targets};
316
317    // `tracing` does not allow differentiating between invalid and missing env var so we manually
318    // do this instead. The alternative is to silently ignore parsing errors which I think is worse.
319    match std::env::var(EnvFilter::DEFAULT_ENV) {
320        Ok(rust_log) => FilterExt::boxed(
321            EnvFilter::from_str(&rust_log)
322                .expect("RUST_LOG should contain a valid filter configuration"),
323        ),
324        Err(std::env::VarError::NotUnicode(_)) => panic!("RUST_LOG contained non-unicode"),
325        Err(std::env::VarError::NotPresent) => {
326            // Default level is INFO, and additionally enable logs from axum extractor rejections.
327            FilterExt::boxed(
328                Targets::new()
329                    .with_default(LevelFilter::INFO)
330                    .with_target("axum::rejection", LevelFilter::TRACE),
331            )
332        },
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use opentelemetry::Key;
339
340    use super::*;
341
342    #[test]
343    fn resource_uses_configured_defaults() {
344        let detected_resource = Resource::builder_empty()
345            .with_attributes([KeyValue::new("telemetry.sdk.language", "rust")])
346            .build();
347
348        let resource = resource_from_detected(
349            ResourceConfig::default()
350                .with_name("node")
351                .with_attribute("miden.node.role", "sequencer"),
352            &detected_resource,
353            None,
354        );
355
356        assert_eq!(resource_value(&resource, "service.name"), Some(Value::from("node")),);
357        assert_eq!(resource_value(&resource, "service.namespace"), Some(Value::from("miden")),);
358        assert_eq!(resource_value(&resource, "miden.node.role"), Some(Value::from("sequencer")),);
359        assert_eq!(resource_value(&resource, "telemetry.sdk.language"), Some(Value::from("rust")),);
360    }
361
362    #[test]
363    fn resource_prefers_detected_attributes_over_configured_defaults() {
364        let detected_resource = Resource::builder_empty()
365            .with_attributes([
366                KeyValue::new("service.name", "custom-node"),
367                KeyValue::new("service.namespace", "custom-namespace"),
368                KeyValue::new("miden.node.role", "custom-role"),
369            ])
370            .build();
371
372        let resource = resource_from_detected(
373            ResourceConfig::default()
374                .with_name("node")
375                .with_attribute("miden.node.role", "sequencer"),
376            &detected_resource,
377            None,
378        );
379
380        assert_eq!(resource_value(&resource, "service.name"), Some(Value::from("custom-node")),);
381        assert_eq!(
382            resource_value(&resource, "service.namespace"),
383            Some(Value::from("custom-namespace")),
384        );
385        assert_eq!(resource_value(&resource, "miden.node.role"), Some(Value::from("custom-role")),);
386    }
387
388    #[test]
389    fn resource_prefers_explicit_service_name_override() {
390        let detected_resource = Resource::builder_empty()
391            .with_attributes([KeyValue::new("service.name", "resource-attribute-node")])
392            .build();
393
394        let resource = resource_from_detected(
395            ResourceConfig::default().with_name("node"),
396            &detected_resource,
397            Some(Value::from("service-env-node")),
398        );
399
400        assert_eq!(
401            resource_value(&resource, "service.name"),
402            Some(Value::from("service-env-node")),
403        );
404    }
405
406    fn resource_value(resource: &Resource, key: &'static str) -> Option<Value> {
407        resource.get(&Key::from_static_str(key))
408    }
409}