renacer_core/span_record.rs
1//! Parquet-compatible span record schema (Sprint 40 - Golden Thread Core)
2//!
3//! This module defines the canonical schema for storing OpenTelemetry spans in
4//! trueno-db's Parquet-backed storage. The schema is optimized for:
5//!
6//! - **Query performance:** Flat structure for predicate pushdown
7//! - **Compression:** Columnar layout with RLE/dictionary encoding
8//! - **W3C Trace Context:** Native support for traceparent format
9//! - **Causal ordering:** Lamport logical clock for happens-before
10//!
11//! # Design Principles
12//!
13//! 1. **Flat Structure:** Parquet performs best with flat schemas (no deep nesting)
14//! 2. **Fixed-size IDs:** `trace_id` (16 bytes), `span_id` (8 bytes) for efficient indexing
15//! 3. **JSON Attributes:** Flexible key-value pairs stored as JSON string
16//! 4. **Timestamp Precision:** Nanosecond precision for microsecond-level tracing
17//! 5. **Logical Causality:** Lamport clock field for provable ordering
18//!
19//! # Parquet Schema Mapping
20//!
21//! ```text
22//! SpanRecord (Rust) → Parquet Physical Type
23//! ├─ trace_id: [u8; 16] → FIXED_LEN_BYTE_ARRAY(16)
24//! ├─ span_id: [u8; 8] → FIXED_LEN_BYTE_ARRAY(8)
25//! ├─ parent_span_id: Option<..> → FIXED_LEN_BYTE_ARRAY(8), nullable=true
26//! ├─ span_name: String → BYTE_ARRAY (UTF8)
27//! ├─ span_kind: SpanKind → INT32 (enum)
28//! ├─ start_time_nanos: u64 → INT64
29//! ├─ end_time_nanos: u64 → INT64
30//! ├─ logical_clock: u64 → INT64 (Lamport timestamp)
31//! ├─ duration_nanos: u64 → INT64 (computed: end - start)
32//! ├─ status_code: StatusCode → INT32 (enum)
33//! ├─ status_message: String → BYTE_ARRAY (UTF8)
34//! ├─ attributes_json: String → BYTE_ARRAY (UTF8) - JSON map
35//! ├─ resource_json: String → BYTE_ARRAY (UTF8) - JSON map
36//! ├─ process_id: u32 → INT32
37//! └─ thread_id: u64 → INT64
38//! ```
39//!
40//! # Query Patterns
41//!
42//! The schema is optimized for these access patterns:
43//!
44//! ```sql
45//! -- Critical path queries (p95 <20ms for 1M spans)
46//! SELECT * FROM spans WHERE trace_id = ?
47//! SELECT * FROM spans WHERE trace_id = ? ORDER BY logical_clock
48//! SELECT * FROM spans WHERE trace_id = ? AND parent_span_id IS NULL
49//!
50//! -- Temporal range queries
51//! SELECT * FROM spans WHERE start_time_nanos BETWEEN ? AND ?
52//!
53//! -- Process/thread filtering
54//! SELECT * FROM spans WHERE process_id = ? AND thread_id = ?
55//!
56//! -- Status filtering (error analysis)
57//! SELECT * FROM spans WHERE status_code = 2 -- ERROR
58//! ```
59//!
60//! # Peer-Reviewed Foundation
61//!
62//! - **Melnik et al. (2010). "Dremel: Interactive Analysis of Web-Scale Datasets." Google.**
63//! - Finding: Columnar storage with nested encoding enables <1s queries on trillion-row tables
64//! - Application: Parquet schema optimized for predicate pushdown
65//!
66//! - **Lamb et al. (2012). "The Vertica Analytic Database." VLDB.**
67//! - Finding: Column-store compression (RLE, dictionary) achieves 10-50× reduction
68//! - Application: Fixed-size IDs and enums for optimal compression
69
70use serde::{Deserialize, Serialize};
71use std::collections::HashMap;
72
73/// Span record compatible with trueno-db Parquet storage
74///
75/// This is the canonical schema for all spans recorded by renacer. Each span
76/// represents a single operation (syscall, function call, GPU kernel, etc.)
77/// with complete metadata for causal analysis.
78///
79/// # Example
80///
81/// ```
82/// use renacer::span_record::{SpanRecord, SpanKind, StatusCode};
83/// use std::collections::HashMap;
84///
85/// let span = SpanRecord {
86/// trace_id: [0x4b, 0xf9, 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9,
87/// 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9, 0x2f, 0x3c],
88/// span_id: [0x00, 0xf0, 0x67, 0xaa, 0x0b, 0xa9, 0x02, 0xb7],
89/// parent_span_id: None,
90/// span_name: "read".to_string(),
91/// span_kind: SpanKind::Internal,
92/// start_time_nanos: 1700000000000000000,
93/// end_time_nanos: 1700000000000050000,
94/// duration_nanos: 50000,
95/// logical_clock: 42,
96/// status_code: StatusCode::Ok,
97/// status_message: String::new(),
98/// attributes_json: r#"{"syscall.name":"read","syscall.fd":3,"syscall.bytes":1024}"#.to_string(),
99/// resource_json: r#"{"service.name":"renacer","process.pid":1234}"#.to_string(),
100/// process_id: 1234,
101/// thread_id: 1234,
102/// };
103/// ```
104#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
105pub struct SpanRecord {
106 /// W3C Trace Context trace ID (128-bit / 16 bytes)
107 ///
108 /// Format: 32 hex characters (e.g., `4bf92f3c7b644bf92f3c7b644bf92f3c`)
109 ///
110 /// This is the "golden thread" that links all operations across the entire
111 /// pipeline (Rust binary → transpilation → syscalls).
112 pub trace_id: [u8; 16],
113
114 /// W3C Trace Context span ID (64-bit / 8 bytes)
115 ///
116 /// Format: 16 hex characters (e.g., `00f067aa0ba902b7`)
117 ///
118 /// Uniquely identifies this span within the trace.
119 pub span_id: [u8; 8],
120
121 /// Parent span ID (if this span has a parent)
122 ///
123 /// - `None` indicates this is a root span
124 /// - `Some(id)` indicates this span is a child of another span
125 #[serde(skip_serializing_if = "Option::is_none", default)]
126 pub parent_span_id: Option<[u8; 8]>,
127
128 /// Human-readable span name (e.g., "read", "write", "GPU kernel", "HTTP GET")
129 ///
130 /// Should follow OpenTelemetry semantic conventions:
131 /// - Syscalls: use syscall name (e.g., "read", "write")
132 /// - Functions: use function name (e.g., "`process_request`")
133 /// - HTTP: use "HTTP {method}" (e.g., "HTTP GET")
134 pub span_name: String,
135
136 /// Span kind (internal, server, client, producer, consumer)
137 ///
138 /// Indicates the role of this span in the request flow.
139 pub span_kind: SpanKind,
140
141 /// Start time in nanoseconds since UNIX epoch
142 ///
143 /// This is the **physical timestamp** (subject to clock skew).
144 /// Use `logical_clock` for causal ordering.
145 pub start_time_nanos: u64,
146
147 /// End time in nanoseconds since UNIX epoch
148 ///
149 /// This is the **physical timestamp** (subject to clock skew).
150 /// Use `logical_clock` for causal ordering.
151 pub end_time_nanos: u64,
152
153 /// Span duration in nanoseconds (`end_time` - `start_time`)
154 ///
155 /// This is a computed field for query convenience.
156 pub duration_nanos: u64,
157
158 /// Lamport logical clock timestamp
159 ///
160 /// This provides a **mathematical guarantee** of causal ordering:
161 /// if event A → B (happens-before), then `logical_clock(A) < logical_clock(B)`.
162 ///
163 /// Use this for:
164 /// - Critical path analysis (longest path in causal graph)
165 /// - Detecting race conditions (concurrent events have incomparable clocks)
166 /// - Cross-process ordering (even with clock skew)
167 pub logical_clock: u64,
168
169 /// Span status code (unset, ok, error)
170 pub status_code: StatusCode,
171
172 /// Span status message (empty if OK, error message if ERROR)
173 pub status_message: String,
174
175 /// Span attributes as JSON string
176 ///
177 /// This contains all key-value metadata about the span:
178 /// - Syscall arguments: `{"syscall.name":"read","syscall.fd":3,"syscall.bytes":1024}`
179 /// - File paths: `{"file.path":"/etc/passwd","file.line":42}`
180 /// - HTTP: `{"http.method":"GET","http.url":"https://example.com"}`
181 ///
182 /// Stored as JSON to maintain flat Parquet schema (no nested columns).
183 pub attributes_json: String,
184
185 /// Resource attributes as JSON string
186 ///
187 /// This contains metadata about the execution environment:
188 /// - `{"service.name":"renacer","process.pid":1234,"host.name":"server1"}`
189 ///
190 /// Stored as JSON to maintain flat Parquet schema.
191 pub resource_json: String,
192
193 /// Process ID (for filtering by process)
194 pub process_id: u32,
195
196 /// Thread ID (for filtering by thread)
197 pub thread_id: u64,
198}
199
200/// Span kind (OpenTelemetry semantic convention)
201///
202/// Indicates the role of this span in the distributed trace.
203#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
204#[repr(u8)]
205pub enum SpanKind {
206 /// Internal operation (function call, syscall)
207 #[default]
208 Internal = 0,
209
210 /// Server-side request handling (HTTP server, RPC server)
211 Server = 1,
212
213 /// Client-side request (HTTP client, RPC client)
214 Client = 2,
215
216 /// Producer (message queue producer)
217 Producer = 3,
218
219 /// Consumer (message queue consumer)
220 Consumer = 4,
221}
222
223/// Span status code (OpenTelemetry semantic convention)
224///
225/// Indicates whether the span completed successfully or with an error.
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
227#[repr(u8)]
228pub enum StatusCode {
229 /// Default status - span status not set
230 #[default]
231 Unset = 0,
232
233 /// Span completed successfully
234 Ok = 1,
235
236 /// Span completed with an error
237 Error = 2,
238}
239
240impl SpanRecord {
241 /// Create a new `SpanRecord` with computed duration
242 ///
243 /// # Arguments
244 ///
245 /// * `trace_id` - W3C Trace Context trace ID (16 bytes)
246 /// * `span_id` - W3C Trace Context span ID (8 bytes)
247 /// * `parent_span_id` - Parent span ID (None for root spans)
248 /// * `span_name` - Human-readable span name
249 /// * `span_kind` - Span kind (internal, server, client, etc.)
250 /// * `start_time_nanos` - Start time in nanoseconds since UNIX epoch
251 /// * `end_time_nanos` - End time in nanoseconds since UNIX epoch
252 /// * `logical_clock` - Lamport logical clock timestamp
253 /// * `status_code` - Span status code
254 /// * `status_message` - Span status message
255 /// * `attributes` - Span attributes (will be serialized to JSON)
256 /// * `resource` - Resource attributes (will be serialized to JSON)
257 /// * `process_id` - Process ID
258 /// * `thread_id` - Thread ID
259 ///
260 /// # Example
261 ///
262 /// ```
263 /// use renacer::span_record::{SpanRecord, SpanKind, StatusCode};
264 /// use std::collections::HashMap;
265 ///
266 /// let mut attributes = HashMap::new();
267 /// attributes.insert("syscall.name".to_string(), "read".to_string());
268 /// attributes.insert("syscall.fd".to_string(), "3".to_string());
269 ///
270 /// let mut resource = HashMap::new();
271 /// resource.insert("service.name".to_string(), "renacer".to_string());
272 ///
273 /// let span = SpanRecord::new(
274 /// [0x4b, 0xf9, 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9,
275 /// 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9, 0x2f, 0x3c],
276 /// [0x00, 0xf0, 0x67, 0xaa, 0x0b, 0xa9, 0x02, 0xb7],
277 /// None,
278 /// "read".to_string(),
279 /// SpanKind::Internal,
280 /// 1700000000000000000,
281 /// 1700000000000050000,
282 /// 42,
283 /// StatusCode::Ok,
284 /// String::new(),
285 /// attributes,
286 /// resource,
287 /// 1234,
288 /// 1234,
289 /// );
290 ///
291 /// assert_eq!(span.duration_nanos, 50000);
292 /// ```
293 #[allow(clippy::too_many_arguments)]
294 pub fn new(
295 trace_id: [u8; 16],
296 span_id: [u8; 8],
297 parent_span_id: Option<[u8; 8]>,
298 span_name: String,
299 span_kind: SpanKind,
300 start_time_nanos: u64,
301 end_time_nanos: u64,
302 logical_clock: u64,
303 status_code: StatusCode,
304 status_message: String,
305 attributes: HashMap<String, String>,
306 resource: HashMap<String, String>,
307 process_id: u32,
308 thread_id: u64,
309 ) -> Self {
310 let duration_nanos = end_time_nanos.saturating_sub(start_time_nanos);
311
312 let attributes_json =
313 serde_json::to_string(&attributes).unwrap_or_else(|_| "{}".to_string());
314
315 let resource_json = serde_json::to_string(&resource).unwrap_or_else(|_| "{}".to_string());
316
317 Self {
318 trace_id,
319 span_id,
320 parent_span_id,
321 span_name,
322 span_kind,
323 start_time_nanos,
324 end_time_nanos,
325 duration_nanos,
326 logical_clock,
327 status_code,
328 status_message,
329 attributes_json,
330 resource_json,
331 process_id,
332 thread_id,
333 }
334 }
335
336 /// Parse attributes from JSON string
337 ///
338 /// # Returns
339 ///
340 /// `HashMap` of attribute key-value pairs, or empty map if parse fails.
341 pub fn parse_attributes(&self) -> HashMap<String, String> {
342 serde_json::from_str(&self.attributes_json).unwrap_or_default()
343 }
344
345 /// Parse resource attributes from JSON string
346 ///
347 /// # Returns
348 ///
349 /// `HashMap` of resource key-value pairs, or empty map if parse fails.
350 pub fn parse_resource(&self) -> HashMap<String, String> {
351 serde_json::from_str(&self.resource_json).unwrap_or_default()
352 }
353
354 /// Get trace ID as hex string (W3C Trace Context format)
355 ///
356 /// # Example
357 ///
358 /// ```
359 /// use renacer::span_record::{SpanRecord, SpanKind, StatusCode};
360 /// use std::collections::HashMap;
361 ///
362 /// let span = SpanRecord::new(
363 /// [0x4b, 0xf9, 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9,
364 /// 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9, 0x2f, 0x3c],
365 /// [0x00, 0xf0, 0x67, 0xaa, 0x0b, 0xa9, 0x02, 0xb7],
366 /// None,
367 /// "test".to_string(),
368 /// SpanKind::Internal,
369 /// 0, 0, 0,
370 /// StatusCode::Ok,
371 /// String::new(),
372 /// HashMap::new(),
373 /// HashMap::new(),
374 /// 0, 0,
375 /// );
376 ///
377 /// assert_eq!(span.trace_id_hex(), "4bf92f3c7b644bf92f3c7b644bf92f3c");
378 /// ```
379 pub fn trace_id_hex(&self) -> String {
380 hex::encode(self.trace_id)
381 }
382
383 /// Get span ID as hex string (W3C Trace Context format)
384 pub fn span_id_hex(&self) -> String {
385 hex::encode(self.span_id)
386 }
387
388 /// Get parent span ID as hex string (W3C Trace Context format)
389 pub fn parent_span_id_hex(&self) -> Option<String> {
390 self.parent_span_id.map(hex::encode)
391 }
392
393 /// Check if this is a root span (no parent)
394 pub fn is_root(&self) -> bool {
395 self.parent_span_id.is_none()
396 }
397
398 /// Check if this span represents an error
399 pub fn is_error(&self) -> bool {
400 self.status_code == StatusCode::Error
401 }
402}
403
404// We need the hex crate for trace ID formatting
405// Note: This will be added to Cargo.toml dependencies if not present
406
407// Compile-time thread-safety verification (Sprint 59)
408static_assertions::assert_impl_all!(SpanRecord: Send, Sync);
409static_assertions::assert_impl_all!(SpanKind: Send, Sync);
410static_assertions::assert_impl_all!(StatusCode: Send, Sync);
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415
416 #[test]
417 fn test_span_record_creation() {
418 let mut attributes = HashMap::new();
419 attributes.insert("syscall.name".to_string(), "read".to_string());
420 attributes.insert("syscall.fd".to_string(), "3".to_string());
421
422 let mut resource = HashMap::new();
423 resource.insert("service.name".to_string(), "renacer".to_string());
424
425 let span = SpanRecord::new(
426 [1; 16],
427 [2; 8],
428 None,
429 "read".to_string(),
430 SpanKind::Internal,
431 1000,
432 2000,
433 42,
434 StatusCode::Ok,
435 String::new(),
436 attributes,
437 resource,
438 1234,
439 5678,
440 );
441
442 assert_eq!(span.trace_id, [1; 16]);
443 assert_eq!(span.span_id, [2; 8]);
444 assert_eq!(span.parent_span_id, None);
445 assert_eq!(span.span_name, "read");
446 assert_eq!(span.span_kind, SpanKind::Internal);
447 assert_eq!(span.start_time_nanos, 1000);
448 assert_eq!(span.end_time_nanos, 2000);
449 assert_eq!(span.duration_nanos, 1000);
450 assert_eq!(span.logical_clock, 42);
451 assert_eq!(span.status_code, StatusCode::Ok);
452 assert_eq!(span.process_id, 1234);
453 assert_eq!(span.thread_id, 5678);
454 }
455
456 #[test]
457 fn test_duration_computation() {
458 let span = SpanRecord::new(
459 [1; 16],
460 [2; 8],
461 None,
462 "test".to_string(),
463 SpanKind::Internal,
464 1000,
465 3500,
466 42,
467 StatusCode::Ok,
468 String::new(),
469 HashMap::new(),
470 HashMap::new(),
471 0,
472 0,
473 );
474
475 assert_eq!(span.duration_nanos, 2500);
476 }
477
478 #[test]
479 fn test_attributes_serialization() {
480 let mut attributes = HashMap::new();
481 attributes.insert("key1".to_string(), "value1".to_string());
482 attributes.insert("key2".to_string(), "value2".to_string());
483
484 let span = SpanRecord::new(
485 [1; 16],
486 [2; 8],
487 None,
488 "test".to_string(),
489 SpanKind::Internal,
490 0,
491 0,
492 0,
493 StatusCode::Ok,
494 String::new(),
495 attributes.clone(),
496 HashMap::new(),
497 0,
498 0,
499 );
500
501 let parsed = span.parse_attributes();
502 assert_eq!(parsed.get("key1"), Some(&"value1".to_string()));
503 assert_eq!(parsed.get("key2"), Some(&"value2".to_string()));
504 }
505
506 #[test]
507 fn test_trace_id_hex() {
508 let span = SpanRecord::new(
509 [
510 0x4b, 0xf9, 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9, 0x2f, 0x3c, 0x7b, 0x64, 0x4b, 0xf9,
511 0x2f, 0x3c,
512 ],
513 [0x00, 0xf0, 0x67, 0xaa, 0x0b, 0xa9, 0x02, 0xb7],
514 None,
515 "test".to_string(),
516 SpanKind::Internal,
517 0,
518 0,
519 0,
520 StatusCode::Ok,
521 String::new(),
522 HashMap::new(),
523 HashMap::new(),
524 0,
525 0,
526 );
527
528 assert_eq!(span.trace_id_hex(), "4bf92f3c7b644bf92f3c7b644bf92f3c");
529 assert_eq!(span.span_id_hex(), "00f067aa0ba902b7");
530 }
531
532 #[test]
533 fn test_is_root() {
534 let root_span = SpanRecord::new(
535 [1; 16],
536 [2; 8],
537 None,
538 "root".to_string(),
539 SpanKind::Internal,
540 0,
541 0,
542 0,
543 StatusCode::Ok,
544 String::new(),
545 HashMap::new(),
546 HashMap::new(),
547 0,
548 0,
549 );
550
551 let child_span = SpanRecord::new(
552 [1; 16],
553 [3; 8],
554 Some([2; 8]),
555 "child".to_string(),
556 SpanKind::Internal,
557 0,
558 0,
559 1,
560 StatusCode::Ok,
561 String::new(),
562 HashMap::new(),
563 HashMap::new(),
564 0,
565 0,
566 );
567
568 assert!(root_span.is_root());
569 assert!(!child_span.is_root());
570 }
571
572 #[test]
573 fn test_is_error() {
574 let ok_span = SpanRecord::new(
575 [1; 16],
576 [2; 8],
577 None,
578 "ok".to_string(),
579 SpanKind::Internal,
580 0,
581 0,
582 0,
583 StatusCode::Ok,
584 String::new(),
585 HashMap::new(),
586 HashMap::new(),
587 0,
588 0,
589 );
590
591 let error_span = SpanRecord::new(
592 [1; 16],
593 [3; 8],
594 None,
595 "error".to_string(),
596 SpanKind::Internal,
597 0,
598 0,
599 1,
600 StatusCode::Error,
601 "Something went wrong".to_string(),
602 HashMap::new(),
603 HashMap::new(),
604 0,
605 0,
606 );
607
608 assert!(!ok_span.is_error());
609 assert!(error_span.is_error());
610 }
611
612 #[test]
613 fn test_span_kind_default() {
614 assert_eq!(SpanKind::default(), SpanKind::Internal);
615 }
616
617 #[test]
618 fn test_status_code_default() {
619 assert_eq!(StatusCode::default(), StatusCode::Unset);
620 }
621}
622
623#[cfg(kani)]
624mod kani_proofs {
625 use super::*;
626
627 /// Prove span duration is always end_time - start_time
628 #[kani::proof]
629 fn proof_duration_consistency() {
630 let start: u64 = kani::any();
631 let end: u64 = kani::any();
632 kani::assume(end >= start);
633 let duration = end - start;
634 kani::assert(start + duration == end, "duration must equal end - start");
635 }
636
637 /// Prove SpanKind enum covers all variants
638 #[kani::proof]
639 fn proof_span_kind_exhaustive() {
640 let kind: u8 = kani::any();
641 kani::assume(kind <= 4);
642 let _result = match kind {
643 0 => SpanKind::Internal,
644 1 => SpanKind::Server,
645 2 => SpanKind::Client,
646 3 => SpanKind::Producer,
647 _ => SpanKind::Consumer,
648 };
649 }
650
651 /// Prove StatusCode enum covers all variants
652 #[kani::proof]
653 fn proof_status_code_exhaustive() {
654 let code: u8 = kani::any();
655 kani::assume(code <= 2);
656 let _result = match code {
657 0 => StatusCode::Unset,
658 1 => StatusCode::Ok,
659 _ => StatusCode::Error,
660 };
661 }
662}