Skip to main content

miden_node_utils/
logging.rs

1use std::str::FromStr;
2use std::sync::OnceLock;
3
4use opentelemetry::trace::TracerProvider as _;
5use opentelemetry::{KeyValue, Value};
6use opentelemetry_otlp::WithTonicConfig as _;
7use opentelemetry_sdk::Resource;
8use opentelemetry_sdk::propagation::TraceContextPropagator;
9use opentelemetry_sdk::resource::{EnvResourceDetector, TelemetryResourceDetector};
10use opentelemetry_sdk::trace::SdkTracerProvider;
11use tracing::subscriber::Subscriber;
12use tracing_opentelemetry::OpenTelemetryLayer;
13use tracing_subscriber::layer::{Filter, SubscriberExt};
14use tracing_subscriber::{EnvFilter, Layer, Registry};
15
16use crate::tracing::ErrorSpanExt;
17
18/// Global tracer provider for flushing traces on panic.
19///
20/// This is necessary because the panic hook needs access to the tracer provider to flush
21/// pending spans before the program terminates.
22static TRACER_PROVIDER: OnceLock<SdkTracerProvider> = OnceLock::new();
23
24/// Default OpenTelemetry resource attributes for this process.
25#[derive(Clone, Default)]
26pub struct ResourceConfig {
27    service_name: Option<&'static str>,
28    attributes: Vec<(&'static str, &'static str)>,
29}
30
31impl ResourceConfig {
32    #[must_use]
33    pub fn with_name(mut self, service_name: &'static str) -> Self {
34        self.service_name = Some(service_name);
35        self
36    }
37
38    #[must_use]
39    pub fn with_attribute(mut self, key: &'static str, value: &'static str) -> Self {
40        self.attributes.push((key, value));
41        self
42    }
43}
44
45/// Configures [`setup_tracing`] to enable or disable the open-telemetry exporter.
46#[derive(Clone)]
47pub enum OpenTelemetry {
48    Enabled(ResourceConfig),
49    Disabled,
50}
51
52impl OpenTelemetry {
53    pub fn enabled() -> Self {
54        OpenTelemetry::Enabled(ResourceConfig::default())
55    }
56
57    pub fn from_env() -> Self {
58        if otlp_endpoint_configured() {
59            OpenTelemetry::enabled()
60        } else {
61            OpenTelemetry::Disabled
62        }
63    }
64
65    #[must_use]
66    pub fn with_name(self, service_name: &'static str) -> Self {
67        match self {
68            OpenTelemetry::Enabled(config) => {
69                OpenTelemetry::Enabled(config.with_name(service_name))
70            },
71            OpenTelemetry::Disabled => OpenTelemetry::Disabled,
72        }
73    }
74
75    #[must_use]
76    pub fn with_attribute(self, key: &'static str, value: &'static str) -> Self {
77        match self {
78            OpenTelemetry::Enabled(config) => {
79                OpenTelemetry::Enabled(config.with_attribute(key, value))
80            },
81            OpenTelemetry::Disabled => OpenTelemetry::Disabled,
82        }
83    }
84
85    fn is_enabled(&self) -> bool {
86        matches!(self, OpenTelemetry::Enabled(_))
87    }
88
89    fn resource_config(self) -> Option<ResourceConfig> {
90        match self {
91            OpenTelemetry::Enabled(config) => Some(config),
92            OpenTelemetry::Disabled => None,
93        }
94    }
95}
96
97/// Tracing subscriber configuration.
98#[derive(Clone)]
99pub struct TracingConfig {
100    pub open_telemetry: OpenTelemetry,
101    pub stdout_filter: String,
102    pub otel_filter: String,
103}
104
105impl TracingConfig {
106    #[must_use]
107    pub fn from_env(open_telemetry: OpenTelemetry) -> Self {
108        Self {
109            open_telemetry,
110            stdout_filter: filter_env_or_default("MIDEN_STDOUT_FILTER", "info,user=debug"),
111            otel_filter: filter_env_or_default("MIDEN_OTEL_FILTER", "info,axum::rejection=trace"),
112        }
113    }
114}
115
116fn filter_env_or_default(var: &str, default: &str) -> String {
117    std::env::var(var)
118        .ok()
119        .filter(|value| !value.trim().is_empty())
120        .or_else(|| {
121            std::env::var(EnvFilter::DEFAULT_ENV)
122                .ok()
123                .filter(|value| !value.trim().is_empty())
124        })
125        .unwrap_or_else(|| default.to_owned())
126}
127
128fn filter_from_string<S>(
129    filter: &str,
130) -> anyhow::Result<Box<dyn Filter<S> + Send + Sync + 'static>> {
131    use tracing_subscriber::filter::FilterExt;
132
133    Ok(FilterExt::boxed(EnvFilter::from_str(filter)?))
134}
135
136/// A guard that shuts down the tracer provider when dropped. This ensures that the logs are flushed
137/// to the exporter before the program exits.
138pub struct OtelGuard {
139    tracer_provider: SdkTracerProvider,
140}
141
142impl Drop for OtelGuard {
143    fn drop(&mut self) {
144        if let Err(err) = self.tracer_provider.shutdown() {
145            eprintln!("{err:?}");
146        }
147    }
148}
149
150/// Initializes tracing to stdout and optionally an open-telemetry exporter.
151///
152/// Stdout trace filtering is configured with `MIDEN_STDOUT_FILTER`, then `RUST_LOG`, then `info,user=debug`.
153/// OpenTelemetry export filtering is configured with `MIDEN_OTEL_FILTER`, then `RUST_LOG`, then
154/// `info,axum::rejection=trace`.
155///
156/// The open-telemetry configuration is controlled via environment variables as defined in the
157/// [specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md#opentelemetry-protocol-exporter)
158///
159/// Registers a panic hook so that panic errors are reported to the open-telemetry exporter.
160///
161/// Returns an [`OtelGuard`] if open-telemetry is enabled, otherwise `None`. When this guard is
162/// dropped, the tracer provider is shutdown.
163pub fn setup_tracing(otel: OpenTelemetry) -> anyhow::Result<Option<OtelGuard>> {
164    setup_tracing_with_config(TracingConfig::from_env(otel))
165}
166
167/// Initializes tracing from explicit stdout and OpenTelemetry filter configuration.
168///
169/// Returns an [`OtelGuard`] if open-telemetry is enabled, otherwise `None`. When this guard is
170/// dropped, the tracer provider is shutdown.
171pub fn setup_tracing_with_config(config: TracingConfig) -> anyhow::Result<Option<OtelGuard>> {
172    let TracingConfig {
173        open_telemetry: otel,
174        stdout_filter,
175        otel_filter,
176    } = config;
177
178    if otel.is_enabled() {
179        opentelemetry::global::set_text_map_propagator(TraceContextPropagator::new());
180    }
181
182    // Note: open-telemetry requires a tokio-runtime, so this _must_ be lazily evaluated (aka not
183    // `then_some`) to avoid crashing sync callers (with OpenTelemetry::Disabled set). Examples of
184    // such callers are tests with logging enabled.
185    let tracer_provider = if otel.is_enabled() {
186        let provider = init_tracer_provider(
187            otel.resource_config()
188                .expect("resource config is set when OpenTelemetry is enabled"),
189        )?;
190
191        // Store the provider globally so the panic hook can flush it. SdkTracerProvider is
192        // internally reference-counted, so cloning is cheap.
193        TRACER_PROVIDER
194            .set(provider.clone())
195            .expect("setup_tracing should only be called once");
196
197        Some(provider)
198    } else {
199        None
200    };
201    let otel_layer = tracer_provider.as_ref().map(|provider| {
202        OpenTelemetryLayer::new(provider.tracer("tracing-otel-subscriber")).boxed()
203    });
204
205    let subscriber = Registry::default()
206        .with(stdout_layer().with_filter(filter_from_string(&stdout_filter)?))
207        .with(otel_layer.with_filter(filter_from_string(&otel_filter)?));
208    tracing::subscriber::set_global_default(subscriber).map_err(Into::<anyhow::Error>::into)?;
209
210    // Register panic hook now that tracing is initialized. This chains with the default panic hook
211    // to preserve backtrace printing.
212    let default_hook = std::panic::take_hook();
213    std::panic::set_hook(Box::new(move |info| {
214        tracing::error!(panic = true, info = %info, "panic");
215
216        // Mark the current span as failed for OpenTelemetry.
217        let info_str = info.to_string();
218        let wrapped = anyhow::Error::msg(info_str);
219        tracing::Span::current().set_error(wrapped.as_ref());
220
221        // Flush traces before the program terminates. This ensures the panic trace is exported even
222        // though the OtelGuard won't be dropped.
223        if let Some(provider) = TRACER_PROVIDER.get() {
224            if let Err(err) = provider.force_flush() {
225                eprintln!("Failed to flush traces on panic: {err:?}");
226            }
227        }
228
229        // Call the default hook to print the backtrace.
230        default_hook(info);
231    }));
232
233    Ok(tracer_provider.map(|tracer_provider| OtelGuard { tracer_provider }))
234}
235
236fn init_tracer_provider(resource_config: ResourceConfig) -> anyhow::Result<SdkTracerProvider> {
237    let exporter = opentelemetry_otlp::SpanExporter::builder()
238        .with_tonic()
239        .with_tls_config(tonic::transport::ClientTlsConfig::new().with_enabled_roots())
240        .build()?;
241    let resource = resource(resource_config);
242
243    Ok(opentelemetry_sdk::trace::SdkTracerProvider::builder()
244        .with_resource(resource)
245        .with_batch_exporter(exporter)
246        .build())
247}
248
249fn resource(config: ResourceConfig) -> Resource {
250    let detected_resource = Resource::builder_empty()
251        .with_detector(Box::new(TelemetryResourceDetector))
252        .with_detector(Box::new(EnvResourceDetector::new()))
253        .build();
254
255    resource_from_detected(config, &detected_resource, otel_service_name_override())
256}
257
258fn resource_from_detected(
259    config: ResourceConfig,
260    detected_resource: &Resource,
261    service_name_override: Option<Value>,
262) -> Resource {
263    const SERVICE_NAME: &str = "service.name";
264    const SERVICE_NAMESPACE: &str = "service.namespace";
265
266    let mut attributes =
267        std::collections::BTreeMap::from([(SERVICE_NAMESPACE.to_string(), Value::from("miden"))]);
268
269    if let Some(service_name) = config.service_name {
270        attributes.insert(SERVICE_NAME.to_string(), Value::from(service_name));
271    }
272
273    for (key, value) in config.attributes {
274        attributes.insert(key.to_string(), Value::from(value));
275    }
276
277    // Environment resource attributes override defaults above, and OTEL_SERVICE_NAME overrides
278    // both.
279    for (key, value) in detected_resource {
280        attributes.insert(key.as_str().to_string(), value.clone());
281    }
282
283    if let Some(service_name) = service_name_override {
284        attributes.insert(SERVICE_NAME.to_string(), service_name);
285    }
286
287    Resource::builder_empty()
288        .with_attributes(attributes.into_iter().map(|(key, value)| KeyValue::new(key, value)))
289        .build()
290}
291
292fn otel_service_name_override() -> Option<Value> {
293    std::env::var("OTEL_SERVICE_NAME")
294        .ok()
295        .filter(|value| !value.is_empty())
296        .map(Value::from)
297}
298
299fn otlp_endpoint_configured() -> bool {
300    ["OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "OTEL_EXPORTER_OTLP_ENDPOINT"]
301        .into_iter()
302        .any(|key| std::env::var(key).is_ok_and(|value| !value.trim().is_empty()))
303}
304
305/// Initializes tracing to a test exporter.
306///
307/// Allows trace content to be inspected via the returned receiver.
308///
309/// All tests that use this function must be annotated with `#[serial(open_telemetry_tracing)]`.
310/// This forces serialization of all such tests. Otherwise, the tested spans could
311/// be interleaved during runtime. Also, the global exporter could be re-initialized in
312/// the middle of a concurrently running test.
313#[cfg(feature = "testing")]
314pub fn setup_test_tracing() -> anyhow::Result<(
315    tokio::sync::mpsc::UnboundedReceiver<opentelemetry_sdk::trace::SpanData>,
316    tokio::sync::mpsc::UnboundedReceiver<()>,
317)> {
318    let (exporter, rx_export, rx_shutdown) =
319        opentelemetry_sdk::testing::trace::new_tokio_test_exporter();
320
321    let tracer_provider = opentelemetry_sdk::trace::SdkTracerProvider::builder()
322        .with_batch_exporter(exporter)
323        .build();
324    let otel_layer =
325        OpenTelemetryLayer::new(tracer_provider.tracer("tracing-otel-subscriber")).boxed();
326    let subscriber = Registry::default()
327        .with(stdout_layer().with_filter(filter_from_string("debug")?))
328        .with(otel_layer.with_filter(filter_from_string("info,axum::rejection=trace")?));
329    tracing::subscriber::set_global_default(subscriber)?;
330    Ok((rx_export, rx_shutdown))
331}
332
333#[cfg(not(feature = "tracing-forest"))]
334fn stdout_layer<S>() -> Box<dyn tracing_subscriber::Layer<S> + Send + Sync + 'static>
335where
336    S: Subscriber,
337    for<'a> S: tracing_subscriber::registry::LookupSpan<'a>,
338{
339    tracing_subscriber::fmt::layer()
340        .compact()
341        .with_level(true)
342        .with_file(false)
343        .with_line_number(false)
344        .with_target(false)
345        .boxed()
346}
347
348#[cfg(feature = "tracing-forest")]
349fn stdout_layer<S>() -> Box<dyn tracing_subscriber::Layer<S> + Send + Sync + 'static>
350where
351    S: Subscriber,
352    for<'a> S: tracing_subscriber::registry::LookupSpan<'a>,
353{
354    tracing_forest::ForestLayer::default().boxed()
355}
356
357#[cfg(test)]
358mod tests {
359    use opentelemetry::Key;
360
361    use super::*;
362
363    #[test]
364    fn resource_uses_configured_defaults() {
365        let detected_resource = Resource::builder_empty()
366            .with_attributes([KeyValue::new("telemetry.sdk.language", "rust")])
367            .build();
368
369        let resource = resource_from_detected(
370            ResourceConfig::default()
371                .with_name("node")
372                .with_attribute("miden.node.role", "sequencer"),
373            &detected_resource,
374            None,
375        );
376
377        assert_eq!(resource_value(&resource, "service.name"), Some(Value::from("node")),);
378        assert_eq!(resource_value(&resource, "service.namespace"), Some(Value::from("miden")),);
379        assert_eq!(resource_value(&resource, "miden.node.role"), Some(Value::from("sequencer")),);
380        assert_eq!(resource_value(&resource, "telemetry.sdk.language"), Some(Value::from("rust")),);
381    }
382
383    #[test]
384    fn resource_prefers_detected_attributes_over_configured_defaults() {
385        let detected_resource = Resource::builder_empty()
386            .with_attributes([
387                KeyValue::new("service.name", "custom-node"),
388                KeyValue::new("service.namespace", "custom-namespace"),
389                KeyValue::new("miden.node.role", "custom-role"),
390            ])
391            .build();
392
393        let resource = resource_from_detected(
394            ResourceConfig::default()
395                .with_name("node")
396                .with_attribute("miden.node.role", "sequencer"),
397            &detected_resource,
398            None,
399        );
400
401        assert_eq!(resource_value(&resource, "service.name"), Some(Value::from("custom-node")),);
402        assert_eq!(
403            resource_value(&resource, "service.namespace"),
404            Some(Value::from("custom-namespace")),
405        );
406        assert_eq!(resource_value(&resource, "miden.node.role"), Some(Value::from("custom-role")),);
407    }
408
409    #[test]
410    fn resource_prefers_explicit_service_name_override() {
411        let detected_resource = Resource::builder_empty()
412            .with_attributes([KeyValue::new("service.name", "resource-attribute-node")])
413            .build();
414
415        let resource = resource_from_detected(
416            ResourceConfig::default().with_name("node"),
417            &detected_resource,
418            Some(Value::from("service-env-node")),
419        );
420
421        assert_eq!(
422            resource_value(&resource, "service.name"),
423            Some(Value::from("service-env-node")),
424        );
425    }
426
427    fn resource_value(resource: &Resource, key: &'static str) -> Option<Value> {
428        resource.get(&Key::from_static_str(key))
429    }
430}