Skip to main content

edict/
telemetry.rs

1//! Telemetry initialization.
2//!
3//! Controlled by `OTEL_EXPORTER_OTLP_ENDPOINT` (the standard OTLP env var):
4//! - unset → no-op (tracing disabled, zero overhead)
5//! - `"stderr"` → JSON spans/events to stderr (non-standard extension)
6//! - `"http://..."` → OTLP HTTP export (traces + logs) to the given endpoint
7//!
8//! ## Distributed tracing
9//!
10//! If `TRACEPARENT` is set (W3C Trace Context format), spans are created as
11//! children of the remote parent. Use [`current_traceparent`] to extract the
12//! current span context for propagation to child processes.
13
14use tracing_subscriber::EnvFilter;
15
16/// Opaque guard — dropping it flushes and shuts down the OTLP pipeline.
17/// Hold this in `main()` until exit.
18pub struct TelemetryGuard {
19    #[cfg(feature = "otel")]
20    trace_provider: Option<opentelemetry_sdk::trace::SdkTracerProvider>,
21    #[cfg(feature = "otel")]
22    log_provider: Option<opentelemetry_sdk::logs::SdkLoggerProvider>,
23    #[cfg(feature = "otel")]
24    meter_provider: Option<opentelemetry_sdk::metrics::SdkMeterProvider>,
25}
26
27impl Drop for TelemetryGuard {
28    fn drop(&mut self) {
29        #[cfg(feature = "otel")]
30        {
31            if let Some(provider) = self.trace_provider.take()
32                && let Err(e) = provider.shutdown()
33            {
34                eprintln!("otel trace shutdown error: {e}");
35            }
36            if let Some(provider) = self.log_provider.take()
37                && let Err(e) = provider.shutdown()
38            {
39                eprintln!("otel log shutdown error: {e}");
40            }
41            if let Some(provider) = self.meter_provider.take()
42                && let Err(e) = provider.shutdown()
43            {
44                eprintln!("otel meter shutdown error: {e}");
45            }
46        }
47    }
48}
49
50/// Initialize telemetry based on `OTEL_EXPORTER_OTLP_ENDPOINT`.
51///
52/// Returns a guard that must be held until the program exits.
53/// Dropping the guard flushes any pending spans and logs.
54#[must_use]
55pub fn init() -> TelemetryGuard {
56    let endpoint = std::env::var("OTEL_EXPORTER_OTLP_ENDPOINT").ok();
57
58    match endpoint.as_deref() {
59        None | Some("") => init_noop(),
60        Some("stderr") => init_stderr(),
61        #[cfg(feature = "otel")]
62        Some(_) => init_otlp(),
63        #[cfg(not(feature = "otel"))]
64        Some(_) => {
65            eprintln!("warning: OTEL_EXPORTER_OTLP_ENDPOINT set but edict built without 'otel' feature");
66            init_noop()
67        }
68    }
69}
70
71const fn init_noop() -> TelemetryGuard {
72    TelemetryGuard {
73        #[cfg(feature = "otel")]
74        trace_provider: None,
75        #[cfg(feature = "otel")]
76        log_provider: None,
77        #[cfg(feature = "otel")]
78        meter_provider: None,
79    }
80}
81
82/// JSON spans/events to stderr via tracing-subscriber's JSON formatter.
83fn init_stderr() -> TelemetryGuard {
84    use tracing_subscriber::layer::SubscriberExt as _;
85    use tracing_subscriber::util::SubscriberInitExt as _;
86
87    let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
88
89    tracing_subscriber::registry()
90        .with(filter)
91        .with(
92            tracing_subscriber::fmt::layer()
93                .json()
94                .with_writer(std::io::stderr)
95                .with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE),
96        )
97        .init();
98
99    TelemetryGuard {
100        #[cfg(feature = "otel")]
101        trace_provider: None,
102        #[cfg(feature = "otel")]
103        log_provider: None,
104        #[cfg(feature = "otel")]
105        meter_provider: None,
106    }
107}
108
109/// OTLP HTTP export (traces + logs).
110///
111/// The SDK reads `OTEL_EXPORTER_OTLP_ENDPOINT` from the environment natively
112/// and appends `/v1/traces` or `/v1/logs` as appropriate.
113#[cfg(feature = "otel")]
114fn init_otlp() -> TelemetryGuard {
115    use opentelemetry::trace::TracerProvider as _;
116    use tracing_subscriber::layer::SubscriberExt as _;
117    use tracing_subscriber::util::SubscriberInitExt as _;
118
119    // --- Traces ---
120    let span_exporter = match opentelemetry_otlp::SpanExporter::builder()
121        .with_http()
122        .build()
123    {
124        Ok(e) => e,
125        Err(e) => {
126            eprintln!("warning: failed to init OTLP span exporter: {e}");
127            return init_noop();
128        }
129    };
130
131    let resource = otel_resource();
132
133    let trace_provider = opentelemetry_sdk::trace::SdkTracerProvider::builder()
134        .with_simple_exporter(span_exporter)
135        .with_resource(resource.clone())
136        .build();
137
138    let tracer = trace_provider.tracer(env!("CARGO_PKG_NAME"));
139    let trace_layer = tracing_opentelemetry::layer().with_tracer(tracer);
140
141    // --- Logs ---
142    let log_exporter = match opentelemetry_otlp::LogExporter::builder()
143        .with_http()
144        .build()
145    {
146        Ok(e) => e,
147        Err(e) => {
148            eprintln!("warning: failed to init OTLP log exporter: {e}");
149            return init_noop();
150        }
151    };
152
153    let log_provider = opentelemetry_sdk::logs::SdkLoggerProvider::builder()
154        .with_simple_exporter(log_exporter)
155        .with_resource(resource.clone())
156        .build();
157
158    let log_layer = opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge::new(
159        &log_provider,
160    );
161
162    // --- Metrics ---
163    let metric_exporter = match opentelemetry_otlp::MetricExporter::builder()
164        .with_http()
165        .build()
166    {
167        Ok(e) => e,
168        Err(e) => {
169            eprintln!("warning: failed to init OTLP metric exporter: {e}");
170            return init_noop();
171        }
172    };
173
174    let meter_provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder()
175        .with_periodic_exporter(metric_exporter)
176        .with_resource(resource)
177        .build();
178
179    opentelemetry::global::set_meter_provider(meter_provider.clone());
180
181    // --- Parent context (distributed tracing) ---
182    install_parent_context();
183
184    // --- Subscriber ---
185    let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
186
187    tracing_subscriber::registry()
188        .with(filter)
189        .with(trace_layer)
190        .with(log_layer)
191        .init();
192
193    TelemetryGuard {
194        trace_provider: Some(trace_provider),
195        log_provider: Some(log_provider),
196        meter_provider: Some(meter_provider),
197    }
198}
199
200/// Extract the current span's trace context as a W3C `TRACEPARENT` string.
201///
202/// Returns `None` if OTEL is not enabled or no valid span context exists.
203/// Use this to propagate trace context to child processes via environment
204/// variables.
205#[cfg(feature = "otel")]
206pub fn current_traceparent() -> Option<String> {
207    use opentelemetry::propagation::TextMapPropagator as _;
208    use opentelemetry_sdk::propagation::TraceContextPropagator;
209    use std::collections::HashMap;
210
211    let propagator = TraceContextPropagator::new();
212    let mut carrier: HashMap<String, String> = HashMap::new();
213    propagator.inject(&mut carrier);
214    carrier.remove("traceparent")
215}
216
217/// Stub when otel feature is disabled.
218#[cfg(not(feature = "otel"))]
219pub fn current_traceparent() -> Option<String> {
220    None
221}
222
223/// If `TRACEPARENT` is set, parse it and install as the current OTel context
224/// so that subsequent spans become children of the remote parent.
225#[cfg(feature = "otel")]
226fn install_parent_context() {
227    use opentelemetry::propagation::TextMapPropagator as _;
228    use opentelemetry_sdk::propagation::TraceContextPropagator;
229    use std::collections::HashMap;
230
231    if let Ok(traceparent) = std::env::var("TRACEPARENT") {
232        let mut carrier: HashMap<String, String> = HashMap::new();
233        carrier.insert("traceparent".to_string(), traceparent);
234        let propagator = TraceContextPropagator::new();
235        let cx = propagator.extract(&carrier);
236        // Attach as the current context — tracing-opentelemetry's layer will
237        // pick this up as the parent for root-level spans.
238        let _guard = cx.attach();
239        // The guard is intentionally leaked: we want this context to remain
240        // active for the lifetime of the process.
241        std::mem::forget(_guard);
242    }
243}
244
245#[cfg(feature = "otel")]
246fn otel_resource() -> opentelemetry_sdk::Resource {
247    use opentelemetry::KeyValue;
248    opentelemetry_sdk::Resource::builder()
249        .with_attribute(KeyValue::new("service.name", env!("CARGO_PKG_NAME")))
250        .with_attribute(KeyValue::new("service.version", env!("CARGO_PKG_VERSION")))
251        .build()
252}
253
254// ---------------------------------------------------------------------------
255// Metrics helpers — thin wrappers so call sites don't need #[cfg] blocks
256// ---------------------------------------------------------------------------
257
258/// Lightweight metrics API. No-ops when the `otel` feature is disabled.
259#[cfg(feature = "otel")]
260pub mod metrics {
261    use opentelemetry::KeyValue;
262    use std::time::Instant;
263
264    /// Increment a counter by `value`.
265    pub fn counter(name: &'static str, value: u64, attrs: &[(&'static str, &str)]) {
266        let meter = opentelemetry::global::meter("edict");
267        let kv: Vec<KeyValue> = attrs
268            .iter()
269            .map(|(k, v)| KeyValue::new(*k, v.to_string()))
270            .collect();
271        meter.u64_counter(name).build().add(value, &kv);
272    }
273
274    /// Record a histogram observation.
275    pub fn histogram(name: &'static str, value: f64, attrs: &[(&'static str, &str)]) {
276        let meter = opentelemetry::global::meter("edict");
277        let kv: Vec<KeyValue> = attrs
278            .iter()
279            .map(|(k, v)| KeyValue::new(*k, v.to_string()))
280            .collect();
281        meter.f64_histogram(name).build().record(value, &kv);
282    }
283
284    /// Capture the start of a timed section.
285    #[must_use]
286    pub fn time_start() -> Instant {
287        Instant::now()
288    }
289
290    /// Record elapsed time (seconds) since `start` as a histogram observation.
291    pub fn time_record(name: &'static str, start: Instant, attrs: &[(&'static str, &str)]) {
292        histogram(name, start.elapsed().as_secs_f64(), attrs);
293    }
294}
295
296#[cfg(not(feature = "otel"))]
297pub mod metrics {
298    /// Increment a counter (no-op).
299    pub fn counter(_name: &'static str, _value: u64, _attrs: &[(&'static str, &str)]) {}
300
301    /// Record a histogram observation (no-op).
302    pub fn histogram(_name: &'static str, _value: f64, _attrs: &[(&'static str, &str)]) {}
303
304    /// Capture the start of a timed section.
305    #[must_use]
306    pub fn time_start() -> std::time::Instant {
307        std::time::Instant::now()
308    }
309
310    /// Record elapsed time (no-op).
311    pub fn time_record(
312        _name: &'static str,
313        _start: std::time::Instant,
314        _attrs: &[(&'static str, &str)],
315    ) {
316    }
317}