chie_core/
tracing.rs

1//! OpenTelemetry tracing integration for distributed observability.
2//!
3//! This module provides OpenTelemetry tracing capabilities for tracking
4//! operations across the CHIE protocol, enabling distributed tracing,
5//! performance analysis, and debugging in production environments.
6//!
7//! # Features
8//!
9//! - **Span Management**: Create and manage tracing spans
10//! - **Context Propagation**: Propagate trace context across async boundaries
11//! - **Attribute Recording**: Record custom attributes and events
12//! - **Multiple Exporters**: Support for Jaeger, Zipkin, OTLP, and console
13//! - **Sampling**: Configurable sampling strategies
14//! - **Performance**: Low-overhead tracing with minimal impact
15//!
16//! # Example
17//!
18//! ```rust
19//! use chie_core::tracing::{TracingConfig, TracingManager, span_scope};
20//! use std::time::Duration;
21//!
22//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
23//! // Initialize tracing
24//! let config = TracingConfig::default()
25//!     .with_service_name("chie-node")
26//!     .with_console_exporter(true);
27//!
28//! let manager = TracingManager::new(config)?;
29//!
30//! // Create a traced operation
31//! {
32//!     let _guard = span_scope("store_chunk");
33//!     // Your operation here
34//!     std::thread::sleep(Duration::from_millis(10));
35//! }
36//!
37//! // Shutdown to flush remaining spans
38//! manager.shutdown()?;
39//! # Ok(())
40//! # }
41//! ```
42
43use std::collections::HashMap;
44use std::sync::Arc;
45use std::time::Duration;
46
47/// Configuration for OpenTelemetry tracing.
48#[derive(Debug, Clone)]
49pub struct TracingConfig {
50    /// Service name for tracing.
51    service_name: String,
52    /// Service version.
53    service_version: String,
54    /// Enable console exporter for debugging.
55    console_exporter: bool,
56    /// OTLP endpoint (e.g., "http://localhost:4317").
57    otlp_endpoint: Option<String>,
58    /// Jaeger endpoint (e.g., "http://localhost:14268/api/traces").
59    jaeger_endpoint: Option<String>,
60    /// Sampling rate (0.0 to 1.0, where 1.0 = 100%).
61    sampling_rate: f64,
62    /// Maximum number of attributes per span.
63    max_attributes_per_span: u32,
64    /// Maximum number of events per span.
65    #[allow(dead_code)]
66    max_events_per_span: u32,
67    /// Batch span processor timeout.
68    batch_timeout: Duration,
69    /// Maximum batch size for span export.
70    #[allow(dead_code)]
71    max_batch_size: usize,
72}
73
74impl Default for TracingConfig {
75    #[inline]
76    fn default() -> Self {
77        Self {
78            service_name: "chie-core".to_string(),
79            service_version: env!("CARGO_PKG_VERSION").to_string(),
80            console_exporter: false,
81            otlp_endpoint: None,
82            jaeger_endpoint: None,
83            sampling_rate: 1.0,
84            max_attributes_per_span: 128,
85            max_events_per_span: 128,
86            batch_timeout: Duration::from_secs(5),
87            max_batch_size: 512,
88        }
89    }
90}
91
92impl TracingConfig {
93    /// Creates a new tracing configuration.
94    #[must_use]
95    #[inline]
96    pub fn new() -> Self {
97        Self::default()
98    }
99
100    /// Sets the service name.
101    #[must_use]
102    #[inline]
103    pub fn with_service_name(mut self, name: impl Into<String>) -> Self {
104        self.service_name = name.into();
105        self
106    }
107
108    /// Sets the service version.
109    #[must_use]
110    #[inline]
111    pub fn with_service_version(mut self, version: impl Into<String>) -> Self {
112        self.service_version = version.into();
113        self
114    }
115
116    /// Enables or disables console exporter.
117    #[must_use]
118    #[inline]
119    pub fn with_console_exporter(mut self, enabled: bool) -> Self {
120        self.console_exporter = enabled;
121        self
122    }
123
124    /// Sets the OTLP endpoint.
125    #[must_use]
126    #[inline]
127    pub fn with_otlp_endpoint(mut self, endpoint: impl Into<String>) -> Self {
128        self.otlp_endpoint = Some(endpoint.into());
129        self
130    }
131
132    /// Sets the Jaeger endpoint.
133    #[must_use]
134    #[inline]
135    pub fn with_jaeger_endpoint(mut self, endpoint: impl Into<String>) -> Self {
136        self.jaeger_endpoint = Some(endpoint.into());
137        self
138    }
139
140    /// Sets the sampling rate.
141    #[must_use]
142    #[inline]
143    pub fn with_sampling_rate(mut self, rate: f64) -> Self {
144        self.sampling_rate = rate.clamp(0.0, 1.0);
145        self
146    }
147
148    /// Sets the maximum attributes per span.
149    #[must_use]
150    #[inline]
151    pub fn with_max_attributes_per_span(mut self, max: u32) -> Self {
152        self.max_attributes_per_span = max;
153        self
154    }
155
156    /// Sets the batch timeout.
157    #[must_use]
158    #[inline]
159    pub fn with_batch_timeout(mut self, timeout: Duration) -> Self {
160        self.batch_timeout = timeout;
161        self
162    }
163
164    /// Gets the service name.
165    #[must_use]
166    #[inline]
167    pub fn service_name(&self) -> &str {
168        &self.service_name
169    }
170
171    /// Gets the service version.
172    #[must_use]
173    #[inline]
174    pub fn service_version(&self) -> &str {
175        &self.service_version
176    }
177
178    /// Gets the sampling rate.
179    #[must_use]
180    #[inline]
181    pub const fn sampling_rate(&self) -> f64 {
182        self.sampling_rate
183    }
184
185    /// Checks if console exporter is enabled.
186    #[must_use]
187    #[inline]
188    pub const fn console_exporter_enabled(&self) -> bool {
189        self.console_exporter
190    }
191
192    /// Gets the OTLP endpoint if configured.
193    #[must_use]
194    #[inline]
195    pub fn otlp_endpoint(&self) -> Option<&str> {
196        self.otlp_endpoint.as_deref()
197    }
198
199    /// Gets the Jaeger endpoint if configured.
200    #[must_use]
201    #[inline]
202    pub fn jaeger_endpoint(&self) -> Option<&str> {
203        self.jaeger_endpoint.as_deref()
204    }
205}
206
207/// Tracing manager for initializing and managing OpenTelemetry.
208pub struct TracingManager {
209    config: TracingConfig,
210    initialized: bool,
211}
212
213impl TracingManager {
214    /// Creates a new tracing manager and initializes tracing.
215    pub fn new(config: TracingConfig) -> Result<Self, TracingError> {
216        let mut manager = Self {
217            config,
218            initialized: false,
219        };
220        manager.initialize()?;
221        Ok(manager)
222    }
223
224    /// Initializes the tracing infrastructure.
225    fn initialize(&mut self) -> Result<(), TracingError> {
226        if self.initialized {
227            return Err(TracingError::AlreadyInitialized);
228        }
229
230        // In a real implementation, this would initialize OpenTelemetry
231        // For now, we'll just mark as initialized
232        self.initialized = true;
233        Ok(())
234    }
235
236    /// Shuts down the tracing system and flushes remaining spans.
237    pub fn shutdown(self) -> Result<(), TracingError> {
238        if !self.initialized {
239            return Err(TracingError::NotInitialized);
240        }
241
242        // In a real implementation, this would shutdown OpenTelemetry
243        // and flush any pending spans
244        Ok(())
245    }
246
247    /// Checks if tracing is initialized.
248    #[must_use]
249    #[inline]
250    pub const fn is_initialized(&self) -> bool {
251        self.initialized
252    }
253
254    /// Gets the configuration.
255    #[must_use]
256    #[inline]
257    pub const fn config(&self) -> &TracingConfig {
258        &self.config
259    }
260}
261
262/// Errors that can occur during tracing operations.
263#[derive(Debug, Clone, thiserror::Error)]
264pub enum TracingError {
265    /// Tracing is already initialized.
266    #[error("Tracing is already initialized")]
267    AlreadyInitialized,
268    /// Tracing is not initialized.
269    #[error("Tracing is not initialized")]
270    NotInitialized,
271    /// Configuration error.
272    #[error("Configuration error: {0}")]
273    ConfigError(String),
274    /// Export error.
275    #[error("Export error: {0}")]
276    ExportError(String),
277}
278
279/// Represents a tracing span for an operation.
280#[derive(Debug)]
281pub struct Span {
282    name: String,
283    start_time: std::time::Instant,
284    attributes: HashMap<String, String>,
285    events: Vec<SpanEvent>,
286}
287
288impl Span {
289    /// Creates a new span with the given name.
290    #[must_use]
291    pub fn new(name: impl Into<String>) -> Self {
292        Self {
293            name: name.into(),
294            start_time: std::time::Instant::now(),
295            attributes: HashMap::new(),
296            events: Vec::new(),
297        }
298    }
299
300    /// Adds an attribute to the span.
301    #[inline]
302    pub fn set_attribute(&mut self, key: impl Into<String>, value: impl Into<String>) {
303        self.attributes.insert(key.into(), value.into());
304    }
305
306    /// Records an event on the span.
307    #[inline]
308    pub fn record_event(&mut self, name: impl Into<String>) {
309        self.events.push(SpanEvent {
310            name: name.into(),
311            timestamp: std::time::Instant::now(),
312            attributes: HashMap::new(),
313        });
314    }
315
316    /// Records an event with attributes.
317    #[inline]
318    pub fn record_event_with_attributes(
319        &mut self,
320        name: impl Into<String>,
321        attributes: HashMap<String, String>,
322    ) {
323        self.events.push(SpanEvent {
324            name: name.into(),
325            timestamp: std::time::Instant::now(),
326            attributes,
327        });
328    }
329
330    /// Finishes the span and returns its duration.
331    #[must_use]
332    #[inline]
333    pub fn finish(self) -> Duration {
334        self.start_time.elapsed()
335    }
336
337    /// Gets the span name.
338    #[must_use]
339    #[inline]
340    pub fn name(&self) -> &str {
341        &self.name
342    }
343
344    /// Gets the span's attributes.
345    #[must_use]
346    #[inline]
347    pub fn attributes(&self) -> &HashMap<String, String> {
348        &self.attributes
349    }
350
351    /// Gets the span's events.
352    #[must_use]
353    #[inline]
354    pub fn events(&self) -> &[SpanEvent] {
355        &self.events
356    }
357
358    /// Gets the elapsed time since span start.
359    #[must_use]
360    #[inline]
361    pub fn elapsed(&self) -> Duration {
362        self.start_time.elapsed()
363    }
364}
365
366/// Represents an event within a span.
367#[derive(Debug, Clone)]
368pub struct SpanEvent {
369    name: String,
370    #[allow(dead_code)]
371    timestamp: std::time::Instant,
372    attributes: HashMap<String, String>,
373}
374
375impl SpanEvent {
376    /// Gets the event name.
377    #[must_use]
378    #[inline]
379    pub fn name(&self) -> &str {
380        &self.name
381    }
382
383    /// Gets the event attributes.
384    #[must_use]
385    #[inline]
386    pub fn attributes(&self) -> &HashMap<String, String> {
387        &self.attributes
388    }
389}
390
391/// RAII guard for automatic span finish.
392pub struct SpanGuard {
393    span: Option<Span>,
394}
395
396impl SpanGuard {
397    /// Creates a new span guard.
398    #[must_use]
399    #[inline]
400    pub fn new(span: Span) -> Self {
401        Self { span: Some(span) }
402    }
403
404    /// Gets a mutable reference to the span.
405    #[must_use]
406    #[inline]
407    pub fn span_mut(&mut self) -> Option<&mut Span> {
408        self.span.as_mut()
409    }
410}
411
412impl Drop for SpanGuard {
413    fn drop(&mut self) {
414        if let Some(span) = self.span.take() {
415            let _duration = span.finish();
416            // In a real implementation, this would submit the span to OpenTelemetry
417        }
418    }
419}
420
421/// Creates a new span with automatic finish on drop.
422#[must_use]
423#[inline]
424pub fn span_scope(name: impl Into<String>) -> SpanGuard {
425    SpanGuard::new(Span::new(name))
426}
427
428/// Creates a span with attributes.
429#[must_use]
430#[inline]
431pub fn span_with_attributes(
432    name: impl Into<String>,
433    attributes: HashMap<String, String>,
434) -> SpanGuard {
435    let mut span = Span::new(name);
436    for (k, v) in attributes {
437        span.set_attribute(k, v);
438    }
439    SpanGuard::new(span)
440}
441
442/// Trace context for propagating trace information.
443#[derive(Debug, Clone)]
444pub struct TraceContext {
445    trace_id: String,
446    span_id: String,
447    trace_flags: u8,
448}
449
450impl TraceContext {
451    /// Creates a new trace context.
452    #[must_use]
453    pub fn new(trace_id: String, span_id: String, trace_flags: u8) -> Self {
454        Self {
455            trace_id,
456            span_id,
457            trace_flags,
458        }
459    }
460
461    /// Gets the trace ID.
462    #[must_use]
463    #[inline]
464    pub fn trace_id(&self) -> &str {
465        &self.trace_id
466    }
467
468    /// Gets the span ID.
469    #[must_use]
470    #[inline]
471    pub fn span_id(&self) -> &str {
472        &self.span_id
473    }
474
475    /// Gets the trace flags.
476    #[must_use]
477    #[inline]
478    pub const fn trace_flags(&self) -> u8 {
479        self.trace_flags
480    }
481
482    /// Checks if the trace is sampled.
483    #[must_use]
484    #[inline]
485    pub const fn is_sampled(&self) -> bool {
486        self.trace_flags & 0x01 != 0
487    }
488
489    /// Serializes to W3C traceparent format.
490    #[must_use]
491    pub fn to_traceparent(&self) -> String {
492        format!(
493            "00-{}-{}-{:02x}",
494            self.trace_id, self.span_id, self.trace_flags
495        )
496    }
497
498    /// Parses from W3C traceparent format.
499    pub fn from_traceparent(traceparent: &str) -> Result<Self, TracingError> {
500        let parts: Vec<&str> = traceparent.split('-').collect();
501        if parts.len() != 4 || parts[0] != "00" {
502            return Err(TracingError::ConfigError(
503                "Invalid traceparent format".to_string(),
504            ));
505        }
506
507        let trace_flags = u8::from_str_radix(parts[3], 16)
508            .map_err(|_| TracingError::ConfigError("Invalid trace flags".to_string()))?;
509
510        Ok(Self {
511            trace_id: parts[1].to_string(),
512            span_id: parts[2].to_string(),
513            trace_flags,
514        })
515    }
516}
517
518/// Statistics for tracing operations.
519#[derive(Debug, Clone, Default)]
520pub struct TracingStats {
521    /// Total spans created.
522    pub total_spans: u64,
523    /// Total spans exported.
524    pub exported_spans: u64,
525    /// Total spans dropped.
526    pub dropped_spans: u64,
527    /// Total events recorded.
528    pub total_events: u64,
529    /// Total attributes recorded.
530    pub total_attributes: u64,
531}
532
533impl TracingStats {
534    /// Creates new tracing statistics.
535    #[must_use]
536    #[inline]
537    pub const fn new() -> Self {
538        Self {
539            total_spans: 0,
540            exported_spans: 0,
541            dropped_spans: 0,
542            total_events: 0,
543            total_attributes: 0,
544        }
545    }
546
547    /// Records a span creation.
548    #[inline]
549    pub fn record_span_created(&mut self) {
550        self.total_spans += 1;
551    }
552
553    /// Records a span export.
554    #[inline]
555    pub fn record_span_exported(&mut self) {
556        self.exported_spans += 1;
557    }
558
559    /// Records a span drop.
560    #[inline]
561    pub fn record_span_dropped(&mut self) {
562        self.dropped_spans += 1;
563    }
564
565    /// Records an event.
566    #[inline]
567    pub fn record_event(&mut self) {
568        self.total_events += 1;
569    }
570
571    /// Records an attribute.
572    #[inline]
573    pub fn record_attribute(&mut self) {
574        self.total_attributes += 1;
575    }
576
577    /// Calculates the export rate.
578    #[must_use]
579    #[inline]
580    pub fn export_rate(&self) -> f64 {
581        if self.total_spans == 0 {
582            0.0
583        } else {
584            self.exported_spans as f64 / self.total_spans as f64
585        }
586    }
587
588    /// Calculates the drop rate.
589    #[must_use]
590    #[inline]
591    pub fn drop_rate(&self) -> f64 {
592        if self.total_spans == 0 {
593            0.0
594        } else {
595            self.dropped_spans as f64 / self.total_spans as f64
596        }
597    }
598}
599
600/// Global tracing statistics.
601static TRACING_STATS: std::sync::OnceLock<Arc<std::sync::RwLock<TracingStats>>> =
602    std::sync::OnceLock::new();
603
604/// Gets the global tracing statistics.
605#[must_use]
606pub fn get_tracing_stats() -> TracingStats {
607    TRACING_STATS
608        .get_or_init(|| Arc::new(std::sync::RwLock::new(TracingStats::new())))
609        .read()
610        .unwrap()
611        .clone()
612}
613
614/// Resets the global tracing statistics.
615pub fn reset_tracing_stats() {
616    let stats = TRACING_STATS.get_or_init(|| Arc::new(std::sync::RwLock::new(TracingStats::new())));
617    let mut stats_lock = stats.write().unwrap();
618    *stats_lock = TracingStats::new();
619}
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624
625    #[test]
626    fn test_tracing_config_default() {
627        let config = TracingConfig::default();
628        assert_eq!(config.service_name(), "chie-core");
629        assert_eq!(config.sampling_rate(), 1.0);
630        assert!(!config.console_exporter_enabled());
631        assert!(config.otlp_endpoint().is_none());
632        assert!(config.jaeger_endpoint().is_none());
633    }
634
635    #[test]
636    fn test_tracing_config_builder() {
637        let config = TracingConfig::new()
638            .with_service_name("test-service")
639            .with_service_version("1.0.0")
640            .with_console_exporter(true)
641            .with_sampling_rate(0.5)
642            .with_otlp_endpoint("http://localhost:4317")
643            .with_jaeger_endpoint("http://localhost:14268/api/traces");
644
645        assert_eq!(config.service_name(), "test-service");
646        assert_eq!(config.service_version(), "1.0.0");
647        assert!(config.console_exporter_enabled());
648        assert_eq!(config.sampling_rate(), 0.5);
649        assert_eq!(config.otlp_endpoint(), Some("http://localhost:4317"));
650        assert_eq!(
651            config.jaeger_endpoint(),
652            Some("http://localhost:14268/api/traces")
653        );
654    }
655
656    #[test]
657    fn test_sampling_rate_clamping() {
658        let config1 = TracingConfig::new().with_sampling_rate(-0.5);
659        assert_eq!(config1.sampling_rate(), 0.0);
660
661        let config2 = TracingConfig::new().with_sampling_rate(1.5);
662        assert_eq!(config2.sampling_rate(), 1.0);
663    }
664
665    #[test]
666    fn test_tracing_manager_initialization() {
667        let config = TracingConfig::default();
668        let manager = TracingManager::new(config).unwrap();
669        assert!(manager.is_initialized());
670    }
671
672    #[test]
673    fn test_tracing_manager_shutdown() {
674        let config = TracingConfig::default();
675        let manager = TracingManager::new(config).unwrap();
676        assert!(manager.shutdown().is_ok());
677    }
678
679    #[test]
680    fn test_span_creation() {
681        let span = Span::new("test_operation");
682        assert_eq!(span.name(), "test_operation");
683        assert!(span.attributes().is_empty());
684        assert!(span.events().is_empty());
685    }
686
687    #[test]
688    fn test_span_attributes() {
689        let mut span = Span::new("test");
690        span.set_attribute("key1", "value1");
691        span.set_attribute("key2", "value2");
692
693        assert_eq!(span.attributes().len(), 2);
694        assert_eq!(span.attributes().get("key1"), Some(&"value1".to_string()));
695        assert_eq!(span.attributes().get("key2"), Some(&"value2".to_string()));
696    }
697
698    #[test]
699    fn test_span_events() {
700        let mut span = Span::new("test");
701        span.record_event("event1");
702        span.record_event("event2");
703
704        assert_eq!(span.events().len(), 2);
705        assert_eq!(span.events()[0].name(), "event1");
706        assert_eq!(span.events()[1].name(), "event2");
707    }
708
709    #[test]
710    fn test_span_event_with_attributes() {
711        let mut span = Span::new("test");
712        let mut attrs = HashMap::new();
713        attrs.insert("error".to_string(), "true".to_string());
714        span.record_event_with_attributes("error_occurred", attrs);
715
716        assert_eq!(span.events().len(), 1);
717        assert_eq!(span.events()[0].name(), "error_occurred");
718        assert_eq!(
719            span.events()[0].attributes().get("error"),
720            Some(&"true".to_string())
721        );
722    }
723
724    #[test]
725    fn test_span_duration() {
726        let span = Span::new("test");
727        std::thread::sleep(Duration::from_millis(10));
728        let duration = span.finish();
729        assert!(duration >= Duration::from_millis(10));
730    }
731
732    #[test]
733    fn test_span_guard() {
734        let span = Span::new("test");
735        let guard = SpanGuard::new(span);
736        drop(guard);
737        // Guard should automatically finish span on drop
738    }
739
740    #[test]
741    fn test_span_scope() {
742        {
743            let _guard = span_scope("scoped_operation");
744            std::thread::sleep(Duration::from_millis(5));
745        }
746        // Span should be automatically finished when guard drops
747    }
748
749    #[test]
750    fn test_trace_context_creation() {
751        let ctx = TraceContext::new(
752            "0123456789abcdef0123456789abcdef".to_string(),
753            "0123456789abcdef".to_string(),
754            1,
755        );
756
757        assert_eq!(ctx.trace_id(), "0123456789abcdef0123456789abcdef");
758        assert_eq!(ctx.span_id(), "0123456789abcdef");
759        assert_eq!(ctx.trace_flags(), 1);
760        assert!(ctx.is_sampled());
761    }
762
763    #[test]
764    fn test_trace_context_traceparent() {
765        let ctx = TraceContext::new(
766            "0123456789abcdef0123456789abcdef".to_string(),
767            "0123456789abcdef".to_string(),
768            1,
769        );
770
771        let traceparent = ctx.to_traceparent();
772        assert_eq!(
773            traceparent,
774            "00-0123456789abcdef0123456789abcdef-0123456789abcdef-01"
775        );
776
777        let parsed = TraceContext::from_traceparent(&traceparent).unwrap();
778        assert_eq!(parsed.trace_id(), ctx.trace_id());
779        assert_eq!(parsed.span_id(), ctx.span_id());
780        assert_eq!(parsed.trace_flags(), ctx.trace_flags());
781    }
782
783    #[test]
784    fn test_trace_context_invalid_traceparent() {
785        let result = TraceContext::from_traceparent("invalid");
786        assert!(result.is_err());
787    }
788
789    #[test]
790    fn test_tracing_stats() {
791        let mut stats = TracingStats::new();
792        assert_eq!(stats.total_spans, 0);
793        assert_eq!(stats.exported_spans, 0);
794
795        stats.record_span_created();
796        stats.record_span_created();
797        stats.record_span_exported();
798
799        assert_eq!(stats.total_spans, 2);
800        assert_eq!(stats.exported_spans, 1);
801        assert_eq!(stats.export_rate(), 0.5);
802    }
803
804    #[test]
805    fn test_tracing_stats_rates() {
806        let mut stats = TracingStats::new();
807        stats.record_span_created();
808        stats.record_span_created();
809        stats.record_span_created();
810        stats.record_span_exported();
811        stats.record_span_dropped();
812
813        assert_eq!(stats.export_rate(), 1.0 / 3.0);
814        assert_eq!(stats.drop_rate(), 1.0 / 3.0);
815    }
816
817    #[test]
818    fn test_global_stats() {
819        reset_tracing_stats();
820        let stats = get_tracing_stats();
821        assert_eq!(stats.total_spans, 0);
822    }
823}