sentinel_proxy/
otel.rs

1//! OpenTelemetry integration for distributed tracing
2//!
3//! This module provides OpenTelemetry support with OTLP export for distributed tracing.
4//! It implements W3C Trace Context propagation (traceparent/tracestate headers).
5//!
6//! # Features
7//!
8//! - W3C Trace Context header propagation
9//! - OTLP export to Jaeger, Tempo, or any OTLP-compatible backend
10//! - Configurable sampling rates
11//! - Request lifecycle spans with semantic conventions
12//!
13//! # Configuration
14//!
15//! ```kdl
16//! observability {
17//!     tracing {
18//!         backend "otlp" {
19//!             endpoint "http://localhost:4317"
20//!         }
21//!         sampling-rate 0.1  // 10% of requests
22//!         service-name "sentinel"
23//!     }
24//! }
25//! ```
26
27use std::sync::OnceLock;
28use tracing::warn;
29
30use sentinel_config::TracingConfig;
31
32/// W3C Trace Context header names
33pub const TRACEPARENT_HEADER: &str = "traceparent";
34pub const TRACESTATE_HEADER: &str = "tracestate";
35
36/// Parsed W3C Trace Context
37#[derive(Debug, Clone)]
38pub struct TraceContext {
39    /// Trace ID (32 hex chars)
40    pub trace_id: String,
41    /// Parent span ID (16 hex chars)
42    pub parent_id: String,
43    /// Whether this trace is sampled
44    pub sampled: bool,
45    /// Optional tracestate header value
46    pub tracestate: Option<String>,
47}
48
49impl TraceContext {
50    /// Parse W3C traceparent header
51    ///
52    /// Format: version-trace_id-parent_id-flags
53    /// Example: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01
54    pub fn parse_traceparent(header: &str) -> Option<Self> {
55        let parts: Vec<&str> = header.split('-').collect();
56        if parts.len() != 4 {
57            return None;
58        }
59
60        let version = parts[0];
61        if version != "00" {
62            // Only support version 00 for now
63            return None;
64        }
65
66        let trace_id = parts[1];
67        let parent_id = parts[2];
68        let flags = parts[3];
69
70        // Validate lengths
71        if trace_id.len() != 32 || parent_id.len() != 16 || flags.len() != 2 {
72            return None;
73        }
74
75        // Parse flags
76        let sampled = u8::from_str_radix(flags, 16).ok()? & 0x01 == 1;
77
78        Some(Self {
79            trace_id: trace_id.to_string(),
80            parent_id: parent_id.to_string(),
81            sampled,
82            tracestate: None,
83        })
84    }
85
86    /// Create traceparent header value
87    pub fn to_traceparent(&self, span_id: &str) -> String {
88        let flags = if self.sampled { "01" } else { "00" };
89        format!("00-{}-{}-{}", self.trace_id, span_id, flags)
90    }
91
92    /// Create a new trace context with generated IDs
93    pub fn new_root(sampled: bool) -> Self {
94        Self {
95            trace_id: generate_trace_id(),
96            parent_id: generate_span_id(),
97            sampled,
98            tracestate: None,
99        }
100    }
101}
102
103/// Generate a new trace ID (32 hex chars)
104pub fn generate_trace_id() -> String {
105    let bytes: [u8; 16] = rand::random();
106    hex::encode(bytes)
107}
108
109/// Generate a new span ID (16 hex chars)
110pub fn generate_span_id() -> String {
111    let bytes: [u8; 8] = rand::random();
112    hex::encode(bytes)
113}
114
115/// Create a traceparent header value
116pub fn create_traceparent(trace_id: &str, span_id: &str, sampled: bool) -> String {
117    let flags = if sampled { "01" } else { "00" };
118    format!("00-{}-{}-{}", trace_id, span_id, flags)
119}
120
121// ============================================================================
122// OpenTelemetry Tracer (when feature enabled)
123// ============================================================================
124
125#[cfg(feature = "opentelemetry")]
126mod otel_impl {
127    use super::*;
128    use opentelemetry::trace::{SpanKind, Tracer};
129    use opentelemetry::{global, KeyValue};
130    use opentelemetry_otlp::WithExportConfig;
131    use opentelemetry_sdk::trace::Sampler;
132    use opentelemetry_sdk::Resource;
133    use std::sync::Arc;
134    use tracing::error;
135
136    /// OpenTelemetry tracer wrapper
137    pub struct OtelTracer {
138        sampling_rate: f64,
139        service_name: String,
140    }
141
142    impl OtelTracer {
143        /// Initialize OpenTelemetry with OTLP exporter
144        pub fn init(config: &TracingConfig) -> Result<Self, OtelError> {
145            let endpoint = match &config.backend {
146                sentinel_config::TracingBackend::Otlp { endpoint } => endpoint.clone(),
147                sentinel_config::TracingBackend::Jaeger { endpoint } => endpoint.clone(),
148                sentinel_config::TracingBackend::Zipkin { endpoint } => endpoint.clone(),
149            };
150
151            info!(
152                endpoint = %endpoint,
153                sampling_rate = config.sampling_rate,
154                service_name = %config.service_name,
155                "Initializing OpenTelemetry tracer"
156            );
157
158            // Create OTLP exporter
159            let exporter = opentelemetry_otlp::SpanExporter::builder()
160                .with_tonic()
161                .with_endpoint(&endpoint)
162                .build()
163                .map_err(|e| OtelError::ExporterInit(e.to_string()))?;
164
165            // Create sampler based on sampling rate
166            let sampler = if config.sampling_rate >= 1.0 {
167                Sampler::AlwaysOn
168            } else if config.sampling_rate <= 0.0 {
169                Sampler::AlwaysOff
170            } else {
171                Sampler::TraceIdRatioBased(config.sampling_rate)
172            };
173
174            // Create resource with service info
175            let resource =
176                Resource::new([KeyValue::new("service.name", config.service_name.clone())]);
177
178            // Build tracer provider
179            let provider = opentelemetry_sdk::trace::TracerProvider::builder()
180                .with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio)
181                .with_sampler(sampler)
182                .with_resource(resource)
183                .build();
184
185            // Set global provider
186            global::set_tracer_provider(provider);
187
188            info!("OpenTelemetry tracer initialized successfully");
189
190            Ok(Self {
191                sampling_rate: config.sampling_rate,
192                service_name: config.service_name.clone(),
193            })
194        }
195
196        /// Create a request span
197        pub fn start_span(
198            &self,
199            method: &str,
200            path: &str,
201            trace_ctx: Option<&TraceContext>,
202        ) -> RequestSpan {
203            let tracer = global::tracer("sentinel-proxy");
204
205            let span = tracer
206                .span_builder(format!("{} {}", method, path))
207                .with_kind(SpanKind::Server)
208                .with_attributes([
209                    KeyValue::new("http.method", method.to_string()),
210                    KeyValue::new("http.target", path.to_string()),
211                    KeyValue::new("service.name", self.service_name.clone()),
212                ])
213                .start(&tracer);
214
215            RequestSpan {
216                _span: span,
217                trace_id: trace_ctx
218                    .map(|c| c.trace_id.clone())
219                    .unwrap_or_else(generate_trace_id),
220                span_id: generate_span_id(),
221            }
222        }
223
224        /// Shutdown the tracer
225        pub fn shutdown(&self) {
226            info!("Shutting down OpenTelemetry tracer");
227            global::shutdown_tracer_provider();
228        }
229    }
230
231    /// Request span wrapper
232    pub struct RequestSpan {
233        _span: opentelemetry::global::BoxedSpan,
234        pub trace_id: String,
235        pub span_id: String,
236    }
237
238    impl RequestSpan {
239        pub fn set_status(&mut self, _status_code: u16) {
240            // Status is recorded when span ends
241        }
242
243        pub fn record_error(&mut self, _error: &str) {
244            // Error is recorded when span ends
245        }
246
247        pub fn set_upstream(&mut self, _upstream: &str, _address: &str) {
248            // Upstream info recorded
249        }
250
251        pub fn end(self) {
252            // Span ends on drop
253        }
254    }
255}
256
257// ============================================================================
258// Stub implementations when feature is disabled
259// ============================================================================
260
261#[cfg(not(feature = "opentelemetry"))]
262mod otel_impl {
263    use super::*;
264
265    pub struct OtelTracer;
266
267    impl OtelTracer {
268        pub fn init(_config: &TracingConfig) -> Result<Self, OtelError> {
269            warn!("OpenTelemetry feature not enabled, tracing disabled");
270            Err(OtelError::TracerInit(
271                "OpenTelemetry feature not enabled".to_string(),
272            ))
273        }
274
275        pub fn start_span(
276            &self,
277            _method: &str,
278            _path: &str,
279            trace_ctx: Option<&TraceContext>,
280        ) -> RequestSpan {
281            RequestSpan {
282                trace_id: trace_ctx
283                    .map(|c| c.trace_id.clone())
284                    .unwrap_or_else(generate_trace_id),
285                span_id: generate_span_id(),
286            }
287        }
288
289        pub fn shutdown(&self) {}
290    }
291
292    pub struct RequestSpan {
293        pub trace_id: String,
294        pub span_id: String,
295    }
296
297    impl RequestSpan {
298        pub fn set_status(&mut self, _status_code: u16) {}
299        pub fn record_error(&mut self, _error: &str) {}
300        pub fn set_upstream(&mut self, _upstream: &str, _address: &str) {}
301        pub fn end(self) {}
302    }
303}
304
305// Re-export from the appropriate module
306pub use otel_impl::{OtelTracer, RequestSpan};
307
308/// OpenTelemetry error types
309#[derive(Debug)]
310pub enum OtelError {
311    ExporterInit(String),
312    TracerInit(String),
313}
314
315impl std::fmt::Display for OtelError {
316    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
317        match self {
318            OtelError::ExporterInit(e) => write!(f, "Failed to initialize OTLP exporter: {}", e),
319            OtelError::TracerInit(e) => write!(f, "Failed to initialize tracer: {}", e),
320        }
321    }
322}
323
324impl std::error::Error for OtelError {}
325
326// ============================================================================
327// Global tracer instance
328// ============================================================================
329
330static GLOBAL_TRACER: OnceLock<Option<OtelTracer>> = OnceLock::new();
331
332/// Initialize the global tracer
333pub fn init_tracer(config: &TracingConfig) -> Result<(), OtelError> {
334    let tracer = OtelTracer::init(config)?;
335    GLOBAL_TRACER
336        .set(Some(tracer))
337        .map_err(|_| OtelError::TracerInit("Global tracer already initialized".to_string()))?;
338    Ok(())
339}
340
341/// Get the global tracer
342pub fn get_tracer() -> Option<&'static OtelTracer> {
343    GLOBAL_TRACER.get().and_then(|t| t.as_ref())
344}
345
346/// Shutdown the global tracer
347pub fn shutdown_tracer() {
348    if let Some(Some(tracer)) = GLOBAL_TRACER.get() {
349        tracer.shutdown();
350    }
351}
352
353// ============================================================================
354// Tests
355// ============================================================================
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    #[test]
362    fn test_parse_valid_traceparent() {
363        let header = "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01";
364        let ctx = TraceContext::parse_traceparent(header).unwrap();
365
366        assert_eq!(ctx.trace_id, "0af7651916cd43dd8448eb211c80319c");
367        assert_eq!(ctx.parent_id, "b7ad6b7169203331");
368        assert!(ctx.sampled);
369    }
370
371    #[test]
372    fn test_parse_unsampled_traceparent() {
373        let header = "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-00";
374        let ctx = TraceContext::parse_traceparent(header).unwrap();
375
376        assert!(!ctx.sampled);
377    }
378
379    #[test]
380    fn test_parse_invalid_traceparent() {
381        // Invalid version
382        assert!(TraceContext::parse_traceparent("01-abc-def-00").is_none());
383
384        // Wrong number of parts
385        assert!(TraceContext::parse_traceparent("00-abc-def").is_none());
386
387        // Wrong trace_id length
388        assert!(TraceContext::parse_traceparent("00-abc-b7ad6b7169203331-01").is_none());
389    }
390
391    #[test]
392    fn test_trace_context_to_traceparent() {
393        let ctx = TraceContext {
394            trace_id: "0af7651916cd43dd8448eb211c80319c".to_string(),
395            parent_id: "b7ad6b7169203331".to_string(),
396            sampled: true,
397            tracestate: None,
398        };
399
400        let new_span_id = "1234567890abcdef";
401        let traceparent = ctx.to_traceparent(new_span_id);
402
403        assert_eq!(
404            traceparent,
405            "00-0af7651916cd43dd8448eb211c80319c-1234567890abcdef-01"
406        );
407    }
408
409    #[test]
410    fn test_generate_trace_id() {
411        let id = generate_trace_id();
412        assert_eq!(id.len(), 32);
413        assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
414    }
415
416    #[test]
417    fn test_generate_span_id() {
418        let id = generate_span_id();
419        assert_eq!(id.len(), 16);
420        assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
421    }
422
423    #[test]
424    fn test_create_traceparent() {
425        let traceparent =
426            create_traceparent("0af7651916cd43dd8448eb211c80319c", "b7ad6b7169203331", true);
427        assert_eq!(
428            traceparent,
429            "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01"
430        );
431    }
432
433    #[test]
434    fn test_new_root_trace_context() {
435        let ctx = TraceContext::new_root(true);
436        assert_eq!(ctx.trace_id.len(), 32);
437        assert_eq!(ctx.parent_id.len(), 16);
438        assert!(ctx.sampled);
439    }
440}