Skip to main content

sentinel_proxy/
otel.rs

1//! OpenTelemetry integration for distributed tracing
2//!
3//! This module provides OpenTelemetry support with OTLP export for distributed tracing.
4//! It implements W3C Trace Context propagation (traceparent/tracestate headers).
5//!
6//! # Features
7//!
8//! - W3C Trace Context header propagation
9//! - OTLP export to Jaeger, Tempo, or any OTLP-compatible backend
10//! - Configurable sampling rates
11//! - Request lifecycle spans with semantic conventions
12//!
13//! # Configuration
14//!
15//! ```kdl
16//! observability {
17//!     tracing {
18//!         backend "otlp" {
19//!             endpoint "http://localhost:4317"
20//!         }
21//!         sampling-rate 0.1  // 10% of requests
22//!         service-name "sentinel"
23//!     }
24//! }
25//! ```
26
27use std::sync::OnceLock;
28use tracing::warn;
29
30use sentinel_config::TracingConfig;
31
32/// W3C Trace Context header names
33pub const TRACEPARENT_HEADER: &str = "traceparent";
34pub const TRACESTATE_HEADER: &str = "tracestate";
35
36/// Parsed W3C Trace Context
37#[derive(Debug, Clone)]
38pub struct TraceContext {
39    /// Trace ID (32 hex chars)
40    pub trace_id: String,
41    /// Parent span ID (16 hex chars)
42    pub parent_id: String,
43    /// Whether this trace is sampled
44    pub sampled: bool,
45    /// Optional tracestate header value
46    pub tracestate: Option<String>,
47}
48
49impl TraceContext {
50    /// Parse W3C traceparent header
51    ///
52    /// Format: version-trace_id-parent_id-flags
53    /// Example: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01
54    pub fn parse_traceparent(header: &str) -> Option<Self> {
55        let parts: Vec<&str> = header.split('-').collect();
56        if parts.len() != 4 {
57            return None;
58        }
59
60        let version = parts[0];
61        if version != "00" {
62            // Only support version 00 for now
63            return None;
64        }
65
66        let trace_id = parts[1];
67        let parent_id = parts[2];
68        let flags = parts[3];
69
70        // Validate lengths
71        if trace_id.len() != 32 || parent_id.len() != 16 || flags.len() != 2 {
72            return None;
73        }
74
75        // Parse flags
76        let sampled = u8::from_str_radix(flags, 16).ok()? & 0x01 == 1;
77
78        Some(Self {
79            trace_id: trace_id.to_string(),
80            parent_id: parent_id.to_string(),
81            sampled,
82            tracestate: None,
83        })
84    }
85
86    /// Create traceparent header value
87    pub fn to_traceparent(&self, span_id: &str) -> String {
88        let flags = if self.sampled { "01" } else { "00" };
89        format!("00-{}-{}-{}", self.trace_id, span_id, flags)
90    }
91
92    /// Create a new trace context with generated IDs
93    pub fn new_root(sampled: bool) -> Self {
94        Self {
95            trace_id: generate_trace_id(),
96            parent_id: generate_span_id(),
97            sampled,
98            tracestate: None,
99        }
100    }
101}
102
103/// Generate a new trace ID (32 hex chars)
104pub fn generate_trace_id() -> String {
105    let bytes: [u8; 16] = rand::random();
106    hex::encode(bytes)
107}
108
109/// Generate a new span ID (16 hex chars)
110pub fn generate_span_id() -> String {
111    let bytes: [u8; 8] = rand::random();
112    hex::encode(bytes)
113}
114
115/// Create a traceparent header value
116pub fn create_traceparent(trace_id: &str, span_id: &str, sampled: bool) -> String {
117    let flags = if sampled { "01" } else { "00" };
118    format!("00-{}-{}-{}", trace_id, span_id, flags)
119}
120
121// ============================================================================
122// OpenTelemetry Tracer (when feature enabled)
123// ============================================================================
124
125#[cfg(feature = "opentelemetry")]
126mod otel_impl {
127    use super::*;
128    use opentelemetry::trace::{SpanKind, Tracer, TracerProvider as _};
129    use opentelemetry::{global, KeyValue};
130    use opentelemetry_otlp::WithExportConfig;
131    use opentelemetry_sdk::trace::{Sampler, SdkTracerProvider};
132    use opentelemetry_sdk::Resource;
133    use tracing::{error, info};
134
135    /// OpenTelemetry tracer wrapper
136    pub struct OtelTracer {
137        provider: SdkTracerProvider,
138        sampling_rate: f64,
139        service_name: String,
140    }
141
142    impl OtelTracer {
143        /// Initialize OpenTelemetry with OTLP exporter
144        pub fn init(config: &TracingConfig) -> Result<Self, OtelError> {
145            let endpoint = match &config.backend {
146                sentinel_config::TracingBackend::Otlp { endpoint } => endpoint.clone(),
147                sentinel_config::TracingBackend::Jaeger { endpoint } => endpoint.clone(),
148                sentinel_config::TracingBackend::Zipkin { endpoint } => endpoint.clone(),
149            };
150
151            info!(
152                endpoint = %endpoint,
153                sampling_rate = config.sampling_rate,
154                service_name = %config.service_name,
155                "Initializing OpenTelemetry tracer"
156            );
157
158            // Create OTLP exporter
159            let exporter = opentelemetry_otlp::SpanExporter::builder()
160                .with_tonic()
161                .with_endpoint(&endpoint)
162                .build()
163                .map_err(|e| OtelError::ExporterInit(e.to_string()))?;
164
165            // Create sampler based on sampling rate
166            let sampler = if config.sampling_rate >= 1.0 {
167                Sampler::AlwaysOn
168            } else if config.sampling_rate <= 0.0 {
169                Sampler::AlwaysOff
170            } else {
171                Sampler::TraceIdRatioBased(config.sampling_rate)
172            };
173
174            // Create resource with service info
175            let resource = Resource::builder()
176                .with_service_name(config.service_name.clone())
177                .build();
178
179            // Build tracer provider
180            let provider = SdkTracerProvider::builder()
181                .with_batch_exporter(exporter)
182                .with_sampler(sampler)
183                .with_resource(resource)
184                .build();
185
186            // Set global provider
187            global::set_tracer_provider(provider.clone());
188
189            info!("OpenTelemetry tracer initialized successfully");
190
191            Ok(Self {
192                provider,
193                sampling_rate: config.sampling_rate,
194                service_name: config.service_name.clone(),
195            })
196        }
197
198        /// Create a request span
199        pub fn start_span(
200            &self,
201            method: &str,
202            path: &str,
203            trace_ctx: Option<&TraceContext>,
204        ) -> RequestSpan {
205            let tracer = global::tracer("sentinel-proxy");
206
207            let span = tracer
208                .span_builder(format!("{} {}", method, path))
209                .with_kind(SpanKind::Server)
210                .with_attributes([
211                    KeyValue::new("http.method", method.to_string()),
212                    KeyValue::new("http.target", path.to_string()),
213                    KeyValue::new("service.name", self.service_name.clone()),
214                ])
215                .start(&tracer);
216
217            RequestSpan {
218                _span: span,
219                trace_id: trace_ctx
220                    .map(|c| c.trace_id.clone())
221                    .unwrap_or_else(generate_trace_id),
222                span_id: generate_span_id(),
223            }
224        }
225
226        /// Shutdown the tracer
227        pub fn shutdown(&self) {
228            info!("Shutting down OpenTelemetry tracer");
229            if let Err(e) = self.provider.shutdown() {
230                error!(error = %e, "Failed to shutdown OpenTelemetry tracer provider");
231            }
232        }
233    }
234
235    /// Request span wrapper
236    pub struct RequestSpan {
237        _span: opentelemetry::global::BoxedSpan,
238        pub trace_id: String,
239        pub span_id: String,
240    }
241
242    impl RequestSpan {
243        pub fn set_status(&mut self, _status_code: u16) {
244            // Status is recorded when span ends
245        }
246
247        pub fn record_error(&mut self, _error: &str) {
248            // Error is recorded when span ends
249        }
250
251        pub fn set_upstream(&mut self, _upstream: &str, _address: &str) {
252            // Upstream info recorded
253        }
254
255        pub fn end(self) {
256            // Span ends on drop
257        }
258    }
259}
260
261// ============================================================================
262// Stub implementations when feature is disabled
263// ============================================================================
264
265#[cfg(not(feature = "opentelemetry"))]
266mod otel_impl {
267    use super::*;
268
269    pub struct OtelTracer;
270
271    impl OtelTracer {
272        pub fn init(_config: &TracingConfig) -> Result<Self, OtelError> {
273            warn!("OpenTelemetry feature not enabled, tracing disabled");
274            Err(OtelError::TracerInit(
275                "OpenTelemetry feature not enabled".to_string(),
276            ))
277        }
278
279        pub fn start_span(
280            &self,
281            _method: &str,
282            _path: &str,
283            trace_ctx: Option<&TraceContext>,
284        ) -> RequestSpan {
285            RequestSpan {
286                trace_id: trace_ctx
287                    .map(|c| c.trace_id.clone())
288                    .unwrap_or_else(generate_trace_id),
289                span_id: generate_span_id(),
290            }
291        }
292
293        pub fn shutdown(&self) {}
294    }
295
296    pub struct RequestSpan {
297        pub trace_id: String,
298        pub span_id: String,
299    }
300
301    impl RequestSpan {
302        pub fn set_status(&mut self, _status_code: u16) {}
303        pub fn record_error(&mut self, _error: &str) {}
304        pub fn set_upstream(&mut self, _upstream: &str, _address: &str) {}
305        pub fn end(self) {}
306    }
307}
308
309// Re-export from the appropriate module
310pub use otel_impl::{OtelTracer, RequestSpan};
311
312/// OpenTelemetry error types
313#[derive(Debug)]
314pub enum OtelError {
315    ExporterInit(String),
316    TracerInit(String),
317}
318
319impl std::fmt::Display for OtelError {
320    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
321        match self {
322            OtelError::ExporterInit(e) => write!(f, "Failed to initialize OTLP exporter: {}", e),
323            OtelError::TracerInit(e) => write!(f, "Failed to initialize tracer: {}", e),
324        }
325    }
326}
327
328impl std::error::Error for OtelError {}
329
330// ============================================================================
331// Global tracer instance
332// ============================================================================
333
334static GLOBAL_TRACER: OnceLock<Option<OtelTracer>> = OnceLock::new();
335
336/// Initialize the global tracer
337pub fn init_tracer(config: &TracingConfig) -> Result<(), OtelError> {
338    let tracer = OtelTracer::init(config)?;
339    GLOBAL_TRACER
340        .set(Some(tracer))
341        .map_err(|_| OtelError::TracerInit("Global tracer already initialized".to_string()))?;
342    Ok(())
343}
344
345/// Get the global tracer
346pub fn get_tracer() -> Option<&'static OtelTracer> {
347    GLOBAL_TRACER.get().and_then(|t| t.as_ref())
348}
349
350/// Shutdown the global tracer
351pub fn shutdown_tracer() {
352    if let Some(Some(tracer)) = GLOBAL_TRACER.get() {
353        tracer.shutdown();
354    }
355}
356
357// ============================================================================
358// Tests
359// ============================================================================
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364
365    #[test]
366    fn test_parse_valid_traceparent() {
367        let header = "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01";
368        let ctx = TraceContext::parse_traceparent(header).unwrap();
369
370        assert_eq!(ctx.trace_id, "0af7651916cd43dd8448eb211c80319c");
371        assert_eq!(ctx.parent_id, "b7ad6b7169203331");
372        assert!(ctx.sampled);
373    }
374
375    #[test]
376    fn test_parse_unsampled_traceparent() {
377        let header = "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-00";
378        let ctx = TraceContext::parse_traceparent(header).unwrap();
379
380        assert!(!ctx.sampled);
381    }
382
383    #[test]
384    fn test_parse_invalid_traceparent() {
385        // Invalid version
386        assert!(TraceContext::parse_traceparent("01-abc-def-00").is_none());
387
388        // Wrong number of parts
389        assert!(TraceContext::parse_traceparent("00-abc-def").is_none());
390
391        // Wrong trace_id length
392        assert!(TraceContext::parse_traceparent("00-abc-b7ad6b7169203331-01").is_none());
393    }
394
395    #[test]
396    fn test_trace_context_to_traceparent() {
397        let ctx = TraceContext {
398            trace_id: "0af7651916cd43dd8448eb211c80319c".to_string(),
399            parent_id: "b7ad6b7169203331".to_string(),
400            sampled: true,
401            tracestate: None,
402        };
403
404        let new_span_id = "1234567890abcdef";
405        let traceparent = ctx.to_traceparent(new_span_id);
406
407        assert_eq!(
408            traceparent,
409            "00-0af7651916cd43dd8448eb211c80319c-1234567890abcdef-01"
410        );
411    }
412
413    #[test]
414    fn test_generate_trace_id() {
415        let id = generate_trace_id();
416        assert_eq!(id.len(), 32);
417        assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
418    }
419
420    #[test]
421    fn test_generate_span_id() {
422        let id = generate_span_id();
423        assert_eq!(id.len(), 16);
424        assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
425    }
426
427    #[test]
428    fn test_create_traceparent() {
429        let traceparent =
430            create_traceparent("0af7651916cd43dd8448eb211c80319c", "b7ad6b7169203331", true);
431        assert_eq!(
432            traceparent,
433            "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01"
434        );
435    }
436
437    #[test]
438    fn test_new_root_trace_context() {
439        let ctx = TraceContext::new_root(true);
440        assert_eq!(ctx.trace_id.len(), 32);
441        assert_eq!(ctx.parent_id.len(), 16);
442        assert!(ctx.sampled);
443    }
444}