Skip to main content

renacer_core/
span_record.rs

1//! Parquet-compatible span record schema (Sprint 40 - Golden Thread Core)
2//!
3//! This module defines the canonical schema for storing OpenTelemetry spans in
4//! trueno-db's Parquet-backed storage. The schema is optimized for:
5//!
6//! - **Query performance:** Flat structure for predicate pushdown
7//! - **Compression:** Columnar layout with RLE/dictionary encoding
8//! - **W3C Trace Context:** Native support for traceparent format
9//! - **Causal ordering:** Lamport logical clock for happens-before
10//!
11//! # Design Principles
12//!
13//! 1. **Flat Structure:** Parquet performs best with flat schemas (no deep nesting)
14//! 2. **Fixed-size IDs:** `trace_id` (16 bytes), `span_id` (8 bytes) for efficient indexing
15//! 3. **JSON Attributes:** Flexible key-value pairs stored as JSON string
16//! 4. **Timestamp Precision:** Nanosecond precision for microsecond-level tracing
17//! 5. **Logical Causality:** Lamport clock field for provable ordering
18//!
19//! # Parquet Schema Mapping
20//!
21//! ```text
22//! SpanRecord (Rust)              →  Parquet Physical Type
23//! ├─ trace_id: [u8; 16]          →  FIXED_LEN_BYTE_ARRAY(16)
24//! ├─ span_id: [u8; 8]            →  FIXED_LEN_BYTE_ARRAY(8)
25//! ├─ parent_span_id: Option<..>  →  FIXED_LEN_BYTE_ARRAY(8), nullable=true
26//! ├─ span_name: String           →  BYTE_ARRAY (UTF8)
27//! ├─ span_kind: SpanKind         →  INT32 (enum)
28//! ├─ start_time_nanos: u64       →  INT64
29//! ├─ end_time_nanos: u64         →  INT64
30//! ├─ logical_clock: u64          →  INT64 (Lamport timestamp)
31//! ├─ duration_nanos: u64         →  INT64 (computed: end - start)
32//! ├─ status_code: StatusCode     →  INT32 (enum)
33//! ├─ status_message: String      →  BYTE_ARRAY (UTF8)
34//! ├─ attributes_json: String     →  BYTE_ARRAY (UTF8) - JSON map
35//! ├─ resource_json: String       →  BYTE_ARRAY (UTF8) - JSON map
36//! ├─ process_id: u32             →  INT32
37//! └─ thread_id: u64              →  INT64
38//! ```
39//!
40//! # Query Patterns
41//!
42//! The schema is optimized for these access patterns:
43//!
44//! ```sql
45//! -- Critical path queries (p95 <20ms for 1M spans)
46//! SELECT * FROM spans WHERE trace_id = ?
47//! SELECT * FROM spans WHERE trace_id = ? ORDER BY logical_clock
48//! SELECT * FROM spans WHERE trace_id = ? AND parent_span_id IS NULL
49//!
50//! -- Temporal range queries
51//! SELECT * FROM spans WHERE start_time_nanos BETWEEN ? AND ?
52//!
53//! -- Process/thread filtering
54//! SELECT * FROM spans WHERE process_id = ? AND thread_id = ?
55//!
56//! -- Status filtering (error analysis)
57//! SELECT * FROM spans WHERE status_code = 2 -- ERROR
58//! ```
59//!
60//! # Peer-Reviewed Foundation
61//!
62//! - **Melnik et al. (2010). "Dremel: Interactive Analysis of Web-Scale Datasets." Google.**
63//!   - Finding: Columnar storage with nested encoding enables <1s queries on trillion-row tables
64//!   - Application: Parquet schema optimized for predicate pushdown
65//!
66//! - **Lamb et al. (2012). "The Vertica Analytic Database." VLDB.**
67//!   - Finding: Column-store compression (RLE, dictionary) achieves 10-50× reduction
68//!   - Application: Fixed-size IDs and enums for optimal compression
69
70use serde::{Deserialize, Serialize};
71use std::collections::HashMap;
72
73/// Span record compatible with trueno-db Parquet storage
74///
75/// This is the canonical schema for all spans recorded by renacer. Each span
76/// represents a single operation (syscall, function call, GPU kernel, etc.)
77/// with complete metadata for causal analysis.
78///
79/// # Example
80///
81/// ```
82/// use renacer::span_record::{SpanRecord, SpanKind, StatusCode};
83/// use std::collections::HashMap;
84///
85/// let span = SpanRecord {
86///     trace_id: [0x4b, 0xf9, 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9,
87///                0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9, 0x2f, 0x3c],
88///     span_id: [0x00, 0xf0, 0x67, 0xaa, 0x0b, 0xa9, 0x02, 0xb7],
89///     parent_span_id: None,
90///     span_name: "read".to_string(),
91///     span_kind: SpanKind::Internal,
92///     start_time_nanos: 1700000000000000000,
93///     end_time_nanos: 1700000000000050000,
94///     duration_nanos: 50000,
95///     logical_clock: 42,
96///     status_code: StatusCode::Ok,
97///     status_message: String::new(),
98///     attributes_json: r#"{"syscall.name":"read","syscall.fd":3,"syscall.bytes":1024}"#.to_string(),
99///     resource_json: r#"{"service.name":"renacer","process.pid":1234}"#.to_string(),
100///     process_id: 1234,
101///     thread_id: 1234,
102/// };
103/// ```
104#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
105pub struct SpanRecord {
106    /// W3C Trace Context trace ID (128-bit / 16 bytes)
107    ///
108    /// Format: 32 hex characters (e.g., `4bf92f3c7b644bf92f3c7b644bf92f3c`)
109    ///
110    /// This is the "golden thread" that links all operations across the entire
111    /// pipeline (Rust binary → transpilation → syscalls).
112    pub trace_id: [u8; 16],
113
114    /// W3C Trace Context span ID (64-bit / 8 bytes)
115    ///
116    /// Format: 16 hex characters (e.g., `00f067aa0ba902b7`)
117    ///
118    /// Uniquely identifies this span within the trace.
119    pub span_id: [u8; 8],
120
121    /// Parent span ID (if this span has a parent)
122    ///
123    /// - `None` indicates this is a root span
124    /// - `Some(id)` indicates this span is a child of another span
125    #[serde(skip_serializing_if = "Option::is_none", default)]
126    pub parent_span_id: Option<[u8; 8]>,
127
128    /// Human-readable span name (e.g., "read", "write", "GPU kernel", "HTTP GET")
129    ///
130    /// Should follow OpenTelemetry semantic conventions:
131    /// - Syscalls: use syscall name (e.g., "read", "write")
132    /// - Functions: use function name (e.g., "`process_request`")
133    /// - HTTP: use "HTTP {method}" (e.g., "HTTP GET")
134    pub span_name: String,
135
136    /// Span kind (internal, server, client, producer, consumer)
137    ///
138    /// Indicates the role of this span in the request flow.
139    pub span_kind: SpanKind,
140
141    /// Start time in nanoseconds since UNIX epoch
142    ///
143    /// This is the **physical timestamp** (subject to clock skew).
144    /// Use `logical_clock` for causal ordering.
145    pub start_time_nanos: u64,
146
147    /// End time in nanoseconds since UNIX epoch
148    ///
149    /// This is the **physical timestamp** (subject to clock skew).
150    /// Use `logical_clock` for causal ordering.
151    pub end_time_nanos: u64,
152
153    /// Span duration in nanoseconds (`end_time` - `start_time`)
154    ///
155    /// This is a computed field for query convenience.
156    pub duration_nanos: u64,
157
158    /// Lamport logical clock timestamp
159    ///
160    /// This provides a **mathematical guarantee** of causal ordering:
161    /// if event A → B (happens-before), then `logical_clock(A) < logical_clock(B)`.
162    ///
163    /// Use this for:
164    /// - Critical path analysis (longest path in causal graph)
165    /// - Detecting race conditions (concurrent events have incomparable clocks)
166    /// - Cross-process ordering (even with clock skew)
167    pub logical_clock: u64,
168
169    /// Span status code (unset, ok, error)
170    pub status_code: StatusCode,
171
172    /// Span status message (empty if OK, error message if ERROR)
173    pub status_message: String,
174
175    /// Span attributes as JSON string
176    ///
177    /// This contains all key-value metadata about the span:
178    /// - Syscall arguments: `{"syscall.name":"read","syscall.fd":3,"syscall.bytes":1024}`
179    /// - File paths: `{"file.path":"/etc/passwd","file.line":42}`
180    /// - HTTP: `{"http.method":"GET","http.url":"https://example.com"}`
181    ///
182    /// Stored as JSON to maintain flat Parquet schema (no nested columns).
183    pub attributes_json: String,
184
185    /// Resource attributes as JSON string
186    ///
187    /// This contains metadata about the execution environment:
188    /// - `{"service.name":"renacer","process.pid":1234,"host.name":"server1"}`
189    ///
190    /// Stored as JSON to maintain flat Parquet schema.
191    pub resource_json: String,
192
193    /// Process ID (for filtering by process)
194    pub process_id: u32,
195
196    /// Thread ID (for filtering by thread)
197    pub thread_id: u64,
198}
199
200/// Span kind (OpenTelemetry semantic convention)
201///
202/// Indicates the role of this span in the distributed trace.
203#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
204#[repr(u8)]
205pub enum SpanKind {
206    /// Internal operation (function call, syscall)
207    #[default]
208    Internal = 0,
209
210    /// Server-side request handling (HTTP server, RPC server)
211    Server = 1,
212
213    /// Client-side request (HTTP client, RPC client)
214    Client = 2,
215
216    /// Producer (message queue producer)
217    Producer = 3,
218
219    /// Consumer (message queue consumer)
220    Consumer = 4,
221}
222
223/// Span status code (OpenTelemetry semantic convention)
224///
225/// Indicates whether the span completed successfully or with an error.
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
227#[repr(u8)]
228pub enum StatusCode {
229    /// Default status - span status not set
230    #[default]
231    Unset = 0,
232
233    /// Span completed successfully
234    Ok = 1,
235
236    /// Span completed with an error
237    Error = 2,
238}
239
240impl SpanRecord {
241    /// Create a new `SpanRecord` with computed duration
242    ///
243    /// # Arguments
244    ///
245    /// * `trace_id` - W3C Trace Context trace ID (16 bytes)
246    /// * `span_id` - W3C Trace Context span ID (8 bytes)
247    /// * `parent_span_id` - Parent span ID (None for root spans)
248    /// * `span_name` - Human-readable span name
249    /// * `span_kind` - Span kind (internal, server, client, etc.)
250    /// * `start_time_nanos` - Start time in nanoseconds since UNIX epoch
251    /// * `end_time_nanos` - End time in nanoseconds since UNIX epoch
252    /// * `logical_clock` - Lamport logical clock timestamp
253    /// * `status_code` - Span status code
254    /// * `status_message` - Span status message
255    /// * `attributes` - Span attributes (will be serialized to JSON)
256    /// * `resource` - Resource attributes (will be serialized to JSON)
257    /// * `process_id` - Process ID
258    /// * `thread_id` - Thread ID
259    ///
260    /// # Example
261    ///
262    /// ```
263    /// use renacer::span_record::{SpanRecord, SpanKind, StatusCode};
264    /// use std::collections::HashMap;
265    ///
266    /// let mut attributes = HashMap::new();
267    /// attributes.insert("syscall.name".to_string(), "read".to_string());
268    /// attributes.insert("syscall.fd".to_string(), "3".to_string());
269    ///
270    /// let mut resource = HashMap::new();
271    /// resource.insert("service.name".to_string(), "renacer".to_string());
272    ///
273    /// let span = SpanRecord::new(
274    ///     [0x4b, 0xf9, 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9,
275    ///      0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9, 0x2f, 0x3c],
276    ///     [0x00, 0xf0, 0x67, 0xaa, 0x0b, 0xa9, 0x02, 0xb7],
277    ///     None,
278    ///     "read".to_string(),
279    ///     SpanKind::Internal,
280    ///     1700000000000000000,
281    ///     1700000000000050000,
282    ///     42,
283    ///     StatusCode::Ok,
284    ///     String::new(),
285    ///     attributes,
286    ///     resource,
287    ///     1234,
288    ///     1234,
289    /// );
290    ///
291    /// assert_eq!(span.duration_nanos, 50000);
292    /// ```
293    #[allow(clippy::too_many_arguments)]
294    pub fn new(
295        trace_id: [u8; 16],
296        span_id: [u8; 8],
297        parent_span_id: Option<[u8; 8]>,
298        span_name: String,
299        span_kind: SpanKind,
300        start_time_nanos: u64,
301        end_time_nanos: u64,
302        logical_clock: u64,
303        status_code: StatusCode,
304        status_message: String,
305        attributes: HashMap<String, String>,
306        resource: HashMap<String, String>,
307        process_id: u32,
308        thread_id: u64,
309    ) -> Self {
310        let duration_nanos = end_time_nanos.saturating_sub(start_time_nanos);
311
312        let attributes_json =
313            serde_json::to_string(&attributes).unwrap_or_else(|_| "{}".to_string());
314
315        let resource_json = serde_json::to_string(&resource).unwrap_or_else(|_| "{}".to_string());
316
317        Self {
318            trace_id,
319            span_id,
320            parent_span_id,
321            span_name,
322            span_kind,
323            start_time_nanos,
324            end_time_nanos,
325            duration_nanos,
326            logical_clock,
327            status_code,
328            status_message,
329            attributes_json,
330            resource_json,
331            process_id,
332            thread_id,
333        }
334    }
335
336    /// Parse attributes from JSON string
337    ///
338    /// # Returns
339    ///
340    /// `HashMap` of attribute key-value pairs, or empty map if parse fails.
341    pub fn parse_attributes(&self) -> HashMap<String, String> {
342        serde_json::from_str(&self.attributes_json).unwrap_or_default()
343    }
344
345    /// Parse resource attributes from JSON string
346    ///
347    /// # Returns
348    ///
349    /// `HashMap` of resource key-value pairs, or empty map if parse fails.
350    pub fn parse_resource(&self) -> HashMap<String, String> {
351        serde_json::from_str(&self.resource_json).unwrap_or_default()
352    }
353
354    /// Get trace ID as hex string (W3C Trace Context format)
355    ///
356    /// # Example
357    ///
358    /// ```
359    /// use renacer::span_record::{SpanRecord, SpanKind, StatusCode};
360    /// use std::collections::HashMap;
361    ///
362    /// let span = SpanRecord::new(
363    ///     [0x4b, 0xf9, 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9,
364    ///      0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9, 0x2f, 0x3c],
365    ///     [0x00, 0xf0, 0x67, 0xaa, 0x0b, 0xa9, 0x02, 0xb7],
366    ///     None,
367    ///     "test".to_string(),
368    ///     SpanKind::Internal,
369    ///     0, 0, 0,
370    ///     StatusCode::Ok,
371    ///     String::new(),
372    ///     HashMap::new(),
373    ///     HashMap::new(),
374    ///     0, 0,
375    /// );
376    ///
377    /// assert_eq!(span.trace_id_hex(), "4bf92f3c7b644bf92f3c7b644bf92f3c");
378    /// ```
379    pub fn trace_id_hex(&self) -> String {
380        hex::encode(self.trace_id)
381    }
382
383    /// Get span ID as hex string (W3C Trace Context format)
384    pub fn span_id_hex(&self) -> String {
385        hex::encode(self.span_id)
386    }
387
388    /// Get parent span ID as hex string (W3C Trace Context format)
389    pub fn parent_span_id_hex(&self) -> Option<String> {
390        self.parent_span_id.map(hex::encode)
391    }
392
393    /// Check if this is a root span (no parent)
394    pub fn is_root(&self) -> bool {
395        self.parent_span_id.is_none()
396    }
397
398    /// Check if this span represents an error
399    pub fn is_error(&self) -> bool {
400        self.status_code == StatusCode::Error
401    }
402}
403
404// We need the hex crate for trace ID formatting
405// Note: This will be added to Cargo.toml dependencies if not present
406
407// Compile-time thread-safety verification (Sprint 59)
408static_assertions::assert_impl_all!(SpanRecord: Send, Sync);
409static_assertions::assert_impl_all!(SpanKind: Send, Sync);
410static_assertions::assert_impl_all!(StatusCode: Send, Sync);
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415
416    #[test]
417    fn test_span_record_creation() {
418        let mut attributes = HashMap::new();
419        attributes.insert("syscall.name".to_string(), "read".to_string());
420        attributes.insert("syscall.fd".to_string(), "3".to_string());
421
422        let mut resource = HashMap::new();
423        resource.insert("service.name".to_string(), "renacer".to_string());
424
425        let span = SpanRecord::new(
426            [1; 16],
427            [2; 8],
428            None,
429            "read".to_string(),
430            SpanKind::Internal,
431            1000,
432            2000,
433            42,
434            StatusCode::Ok,
435            String::new(),
436            attributes,
437            resource,
438            1234,
439            5678,
440        );
441
442        assert_eq!(span.trace_id, [1; 16]);
443        assert_eq!(span.span_id, [2; 8]);
444        assert_eq!(span.parent_span_id, None);
445        assert_eq!(span.span_name, "read");
446        assert_eq!(span.span_kind, SpanKind::Internal);
447        assert_eq!(span.start_time_nanos, 1000);
448        assert_eq!(span.end_time_nanos, 2000);
449        assert_eq!(span.duration_nanos, 1000);
450        assert_eq!(span.logical_clock, 42);
451        assert_eq!(span.status_code, StatusCode::Ok);
452        assert_eq!(span.process_id, 1234);
453        assert_eq!(span.thread_id, 5678);
454    }
455
456    #[test]
457    fn test_duration_computation() {
458        let span = SpanRecord::new(
459            [1; 16],
460            [2; 8],
461            None,
462            "test".to_string(),
463            SpanKind::Internal,
464            1000,
465            3500,
466            42,
467            StatusCode::Ok,
468            String::new(),
469            HashMap::new(),
470            HashMap::new(),
471            0,
472            0,
473        );
474
475        assert_eq!(span.duration_nanos, 2500);
476    }
477
478    #[test]
479    fn test_attributes_serialization() {
480        let mut attributes = HashMap::new();
481        attributes.insert("key1".to_string(), "value1".to_string());
482        attributes.insert("key2".to_string(), "value2".to_string());
483
484        let span = SpanRecord::new(
485            [1; 16],
486            [2; 8],
487            None,
488            "test".to_string(),
489            SpanKind::Internal,
490            0,
491            0,
492            0,
493            StatusCode::Ok,
494            String::new(),
495            attributes.clone(),
496            HashMap::new(),
497            0,
498            0,
499        );
500
501        let parsed = span.parse_attributes();
502        assert_eq!(parsed.get("key1"), Some(&"value1".to_string()));
503        assert_eq!(parsed.get("key2"), Some(&"value2".to_string()));
504    }
505
506    #[test]
507    fn test_trace_id_hex() {
508        let span = SpanRecord::new(
509            [
510                0x4b, 0xf9, 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9, 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9,
511                0x2f, 0x3c,
512            ],
513            [0x00, 0xf0, 0x67, 0xaa, 0x0b, 0xa9, 0x02, 0xb7],
514            None,
515            "test".to_string(),
516            SpanKind::Internal,
517            0,
518            0,
519            0,
520            StatusCode::Ok,
521            String::new(),
522            HashMap::new(),
523            HashMap::new(),
524            0,
525            0,
526        );
527
528        assert_eq!(span.trace_id_hex(), "4bf92f3c7b644bf92f3c7b644bf92f3c");
529        assert_eq!(span.span_id_hex(), "00f067aa0ba902b7");
530    }
531
532    #[test]
533    fn test_is_root() {
534        let root_span = SpanRecord::new(
535            [1; 16],
536            [2; 8],
537            None,
538            "root".to_string(),
539            SpanKind::Internal,
540            0,
541            0,
542            0,
543            StatusCode::Ok,
544            String::new(),
545            HashMap::new(),
546            HashMap::new(),
547            0,
548            0,
549        );
550
551        let child_span = SpanRecord::new(
552            [1; 16],
553            [3; 8],
554            Some([2; 8]),
555            "child".to_string(),
556            SpanKind::Internal,
557            0,
558            0,
559            1,
560            StatusCode::Ok,
561            String::new(),
562            HashMap::new(),
563            HashMap::new(),
564            0,
565            0,
566        );
567
568        assert!(root_span.is_root());
569        assert!(!child_span.is_root());
570    }
571
572    #[test]
573    fn test_is_error() {
574        let ok_span = SpanRecord::new(
575            [1; 16],
576            [2; 8],
577            None,
578            "ok".to_string(),
579            SpanKind::Internal,
580            0,
581            0,
582            0,
583            StatusCode::Ok,
584            String::new(),
585            HashMap::new(),
586            HashMap::new(),
587            0,
588            0,
589        );
590
591        let error_span = SpanRecord::new(
592            [1; 16],
593            [3; 8],
594            None,
595            "error".to_string(),
596            SpanKind::Internal,
597            0,
598            0,
599            1,
600            StatusCode::Error,
601            "Something went wrong".to_string(),
602            HashMap::new(),
603            HashMap::new(),
604            0,
605            0,
606        );
607
608        assert!(!ok_span.is_error());
609        assert!(error_span.is_error());
610    }
611
612    #[test]
613    fn test_span_kind_default() {
614        assert_eq!(SpanKind::default(), SpanKind::Internal);
615    }
616
617    #[test]
618    fn test_status_code_default() {
619        assert_eq!(StatusCode::default(), StatusCode::Unset);
620    }
621}
622
623#[cfg(kani)]
624mod kani_proofs {
625    use super::*;
626
627    /// Prove span duration is always end_time - start_time
628    #[kani::proof]
629    fn proof_duration_consistency() {
630        let start: u64 = kani::any();
631        let end: u64 = kani::any();
632        kani::assume(end >= start);
633        let duration = end - start;
634        kani::assert(start + duration == end, "duration must equal end - start");
635    }
636
637    /// Prove SpanKind enum covers all variants
638    #[kani::proof]
639    fn proof_span_kind_exhaustive() {
640        let kind: u8 = kani::any();
641        kani::assume(kind <= 4);
642        let _result = match kind {
643            0 => SpanKind::Internal,
644            1 => SpanKind::Server,
645            2 => SpanKind::Client,
646            3 => SpanKind::Producer,
647            _ => SpanKind::Consumer,
648        };
649    }
650
651    /// Prove StatusCode enum covers all variants
652    #[kani::proof]
653    fn proof_status_code_exhaustive() {
654        let code: u8 = kani::any();
655        kani::assume(code <= 2);
656        let _result = match code {
657            0 => StatusCode::Unset,
658            1 => StatusCode::Ok,
659            _ => StatusCode::Error,
660        };
661    }
662}