asupersync 0.3.4

Spec-first, cancel-correct, capability-secure async runtime for Rust.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
//! Trace ID generation high load audit test.
//!
//! **AUDIT SCOPE**: Verifies that trace_id generation under 1M+ spans/sec load
//! uses thread-local or lock-free patterns rather than global mutex bottlenecks.
//!
//! **HIGH LOAD REQUIREMENT**:
//! - 1M+ spans/sec should NOT be bottlenecked by ID generation
//! - Thread-local or atomic counters preferred over global locks
//! - Contention-free ID generation for multi-threaded workloads
//! - NOT: global mutex that serializes all ID generation
//!
//! **CRITICAL**: ID generation bottlenecks can collapse high-throughput
//! observability under load, causing span drops and incomplete traces.

#![cfg(test)]

use crate::observability::context::SpanId as ContextSpanId;
use crate::observability::w3c_trace_context::{SpanId as W3CSpanId, TraceId};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Barrier};
use std::thread;
use std::time::{Duration, Instant};

/// Benchmark ID generation performance across multiple threads.
struct IdGenerationBenchmark {
    thread_count: usize,
    ids_per_thread: usize,
    start_barrier: Arc<Barrier>,
    completion_counter: Arc<AtomicU64>,
}

impl IdGenerationBenchmark {
    fn new(thread_count: usize, ids_per_thread: usize) -> Self {
        Self {
            thread_count,
            ids_per_thread,
            start_barrier: Arc::new(Barrier::new(thread_count)),
            completion_counter: Arc::new(AtomicU64::new(0)),
        }
    }

    fn run_context_span_id_benchmark(&self) -> Duration {
        let mut handles = Vec::with_capacity(self.thread_count);
        for thread_id in 0..self.thread_count {
            let barrier = Arc::clone(&self.start_barrier);
            let counter = Arc::clone(&self.completion_counter);
            let ids_per_thread = self.ids_per_thread;

            handles.push(thread::spawn(move || {
                // Wait for all threads to be ready
                barrier.wait();
                let start = Instant::now();

                // Generate IDs at high frequency
                for _ in 0..ids_per_thread {
                    let _id = ContextSpanId::new(); // AtomicU64 implementation
                }

                let duration = start.elapsed();
                counter.fetch_add(1, Ordering::Relaxed);
                (thread_id, duration, ids_per_thread)
            }));
        }

        let overall_start = Instant::now();
        let results: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect();
        let overall_duration = overall_start.elapsed();

        // Report per-thread performance
        for (thread_id, thread_duration, ids_generated) in results {
            let ids_per_sec = ids_generated as f64 / thread_duration.as_secs_f64();
            println!(
                "   Thread {}: {} IDs in {:?} ({:.0} IDs/sec)",
                thread_id, ids_generated, thread_duration, ids_per_sec
            );
        }

        overall_duration
    }

    fn run_w3c_span_id_benchmark(&self) -> Duration {
        let mut handles = Vec::with_capacity(self.thread_count);
        for thread_id in 0..self.thread_count {
            let barrier = Arc::clone(&self.start_barrier);
            let counter = Arc::clone(&self.completion_counter);
            let ids_per_thread = self.ids_per_thread;

            handles.push(thread::spawn(move || {
                // Wait for all threads to be ready
                barrier.wait();
                let start = Instant::now();

                // Generate IDs at high frequency
                for _ in 0..ids_per_thread {
                    let _id = W3CSpanId::new_random(); // getrandom implementation
                }

                let duration = start.elapsed();
                counter.fetch_add(1, Ordering::Relaxed);
                (thread_id, duration, ids_per_thread)
            }));
        }

        let overall_start = Instant::now();
        let results: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect();
        let overall_duration = overall_start.elapsed();

        // Report per-thread performance
        for (thread_id, thread_duration, ids_generated) in results {
            let ids_per_sec = ids_generated as f64 / thread_duration.as_secs_f64();
            println!(
                "   Thread {}: {} IDs in {:?} ({:.0} IDs/sec)",
                thread_id, ids_generated, thread_duration, ids_per_sec
            );
        }

        overall_duration
    }

    fn run_trace_id_benchmark(&self) -> Duration {
        let mut handles = Vec::with_capacity(self.thread_count);
        for thread_id in 0..self.thread_count {
            let barrier = Arc::clone(&self.start_barrier);
            let counter = Arc::clone(&self.completion_counter);
            let ids_per_thread = self.ids_per_thread;

            handles.push(thread::spawn(move || {
                // Wait for all threads to be ready
                barrier.wait();
                let start = Instant::now();

                // Generate IDs at high frequency
                for _ in 0..ids_per_thread {
                    let _id = TraceId::new_random(); // getrandom implementation
                }

                let duration = start.elapsed();
                counter.fetch_add(1, Ordering::Relaxed);
                (thread_id, duration, ids_per_thread)
            }));
        }

        let overall_start = Instant::now();
        let results: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect();
        let overall_duration = overall_start.elapsed();

        // Report per-thread performance
        for (thread_id, thread_duration, ids_generated) in results {
            let ids_per_sec = ids_generated as f64 / thread_duration.as_secs_f64();
            println!(
                "   Thread {}: {} IDs in {:?} ({:.0} IDs/sec)",
                thread_id, ids_generated, thread_duration, ids_per_sec
            );
        }

        overall_duration
    }
}

/// **AUDIT TEST**: Profile ID generation performance under high multi-threaded load.
///
/// **SCENARIO**: 8 threads each generating 125K IDs (1M total) to simulate 1M+ spans/sec.
/// **REQUIREMENT**: Should scale linearly with thread count, no global contention bottleneck.
/// **ASSESSMENT**: Compare atomic counter vs getrandom performance characteristics.
#[test]
fn audit_trace_id_generation_high_load_performance() {
    println!("🔍 AUDIT: Trace ID generation performance under 1M+ spans/sec load");

    println!("📋 High load performance requirements:");
    println!("   • No global mutex bottleneck");
    println!("   • Linear scaling with thread count");
    println!("   • Sustained 1M+ IDs/sec generation");
    println!("   • Thread-local or lock-free patterns");

    let thread_count = 8;
    let ids_per_thread = 125_000; // 8 * 125K = 1M total IDs
    let total_ids = thread_count * ids_per_thread;

    println!("📊 Benchmark configuration:");
    println!("   Threads: {}", thread_count);
    println!("   IDs per thread: {}", ids_per_thread);
    println!("   Total IDs: {}", total_ids);

    // Benchmark 1: Context SpanId (AtomicU64)
    println!("📊 Benchmark 1: Context SpanId (AtomicU64 implementation)");
    let benchmark1 = IdGenerationBenchmark::new(thread_count, ids_per_thread);
    let context_duration = benchmark1.run_context_span_id_benchmark();
    let context_ids_per_sec = total_ids as f64 / context_duration.as_secs_f64();

    println!("   Overall: {} IDs in {:?}", total_ids, context_duration);
    println!("   Throughput: {:.0} IDs/sec", context_ids_per_sec);

    // Benchmark 2: W3C SpanId (getrandom)
    println!("📊 Benchmark 2: W3C SpanId (getrandom implementation)");
    let benchmark2 = IdGenerationBenchmark::new(thread_count, ids_per_thread);
    let w3c_span_duration = benchmark2.run_w3c_span_id_benchmark();
    let w3c_span_ids_per_sec = total_ids as f64 / w3c_span_duration.as_secs_f64();

    println!("   Overall: {} IDs in {:?}", total_ids, w3c_span_duration);
    println!("   Throughput: {:.0} IDs/sec", w3c_span_ids_per_sec);

    // Benchmark 3: TraceId (getrandom)
    println!("📊 Benchmark 3: TraceId (getrandom implementation)");
    let benchmark3 = IdGenerationBenchmark::new(thread_count, ids_per_thread);
    let trace_id_duration = benchmark3.run_trace_id_benchmark();
    let trace_id_ids_per_sec = total_ids as f64 / trace_id_duration.as_secs_f64();

    println!("   Overall: {} IDs in {:?}", total_ids, trace_id_duration);
    println!("   Throughput: {:.0} IDs/sec", trace_id_ids_per_sec);

    // Performance analysis
    println!("📊 Performance comparison:");
    println!(
        "   Context SpanId: {:.0} IDs/sec (AtomicU64)",
        context_ids_per_sec
    );
    println!(
        "   W3C SpanId: {:.0} IDs/sec (getrandom)",
        w3c_span_ids_per_sec
    );
    println!(
        "   TraceId: {:.0} IDs/sec (getrandom)",
        trace_id_ids_per_sec
    );

    let atomic_advantage = context_ids_per_sec / w3c_span_ids_per_sec;
    println!("   Atomic advantage: {:.1}x faster", atomic_advantage);

    // High load sustainability check
    let min_required_throughput = 1_000_000.0; // 1M IDs/sec requirement

    println!("📊 High load sustainability (1M+ IDs/sec requirement):");
    println!(
        "   Context SpanId: {} (AtomicU64)",
        if context_ids_per_sec >= min_required_throughput {
            "✅ MEETS"
        } else {
            "❌ FAILS"
        }
    );
    println!(
        "   W3C SpanId: {} (getrandom)",
        if w3c_span_ids_per_sec >= min_required_throughput {
            "✅ MEETS"
        } else {
            "❌ FAILS"
        }
    );
    println!(
        "   TraceId: {} (getrandom)",
        if trace_id_ids_per_sec >= min_required_throughput {
            "✅ MEETS"
        } else {
            "❌ FAILS"
        }
    );

    // Contention analysis
    if atomic_advantage > 2.0 {
        println!("⚠️  CONTENTION DETECTED: getrandom shows signs of bottlenecking");
        println!("💡 RECOMMENDATION: Consider thread-local optimization for getrandom calls");
    } else {
        println!("✅ NO MAJOR CONTENTION: Performance difference within acceptable range");
    }

    println!("✅ HIGH LOAD ID GENERATION AUDIT COMPLETE");
}

/// **AUDIT TEST**: Demonstrate thread-local optimization pattern for high-frequency ID generation.
///
/// **SCENARIO**: Show how thread-local buffering can eliminate getrandom contention.
/// **REQUIREMENT**: Thread-local pattern should achieve atomic-like performance.
/// **ASSESSMENT**: Optimization strategy for high-load scenarios.
#[test]
fn audit_thread_local_id_generation_optimization() {
    println!("🔍 AUDIT: Thread-local ID generation optimization pattern");

    println!("📋 Thread-local optimization strategy:");
    println!("   • Pre-generate ID pools per thread");
    println!("   • Refill pools when exhausted");
    println!("   • Eliminate per-ID getrandom calls");
    println!("   • Maintain cryptographic quality");

    use std::cell::RefCell;

    // Thread-local optimized ID generator
    thread_local! {
        static SPAN_ID_POOL: RefCell<Vec<u64>> = const { RefCell::new(Vec::new()) };
        static TRACE_ID_POOL: RefCell<Vec<[u8; 16]>> = const { RefCell::new(Vec::new()) };
    }

    fn get_optimized_span_id() -> u64 {
        SPAN_ID_POOL.with(|pool| {
            let mut pool = pool.borrow_mut();
            if pool.is_empty() {
                // Refill pool with batch of random IDs
                let mut random_bytes = vec![0u8; 1000 * std::mem::size_of::<u64>()];
                getrandom::fill(&mut random_bytes).expect("getrandom failed");
                let batch = random_bytes
                    .chunks_exact(std::mem::size_of::<u64>())
                    .map(|chunk| {
                        u64::from_ne_bytes([
                            chunk[0], chunk[1], chunk[2], chunk[3], chunk[4], chunk[5], chunk[6],
                            chunk[7],
                        ])
                    })
                    .collect::<Vec<_>>();
                pool.extend(batch);
            }
            pool.pop().unwrap_or(0)
        })
    }

    fn get_optimized_trace_id() -> [u8; 16] {
        TRACE_ID_POOL.with(|pool| {
            let mut pool = pool.borrow_mut();
            if pool.is_empty() {
                // Refill pool with batch of random trace IDs
                let mut batch = vec![[0u8; 16]; 100]; // 100 trace IDs per batch
                for trace_id in &mut batch {
                    getrandom::fill(trace_id).expect("getrandom failed");
                }
                pool.extend(batch);
            }
            pool.pop().unwrap_or([0u8; 16])
        })
    }

    // Benchmark optimized vs direct generation
    let iterations = 100_000;

    // Direct getrandom calls
    let start = Instant::now();
    for _ in 0..iterations {
        let mut bytes = [0u8; 8];
        getrandom::fill(&mut bytes).expect("getrandom failed");
    }
    let direct_duration = start.elapsed();

    // Thread-local optimized calls
    let start = Instant::now();
    for _ in 0..iterations {
        let _id = get_optimized_span_id();
        let trace_id = get_optimized_trace_id();
        assert_eq!(trace_id.len(), 16, "trace ID pool must emit 16-byte IDs");
    }
    let optimized_duration = start.elapsed();

    let speedup = direct_duration.as_secs_f64() / optimized_duration.as_secs_f64();

    println!("📊 Thread-local optimization results:");
    println!(
        "   Direct getrandom: {:?} for {} IDs",
        direct_duration, iterations
    );
    println!(
        "   Thread-local pool: {:?} for {} IDs",
        optimized_duration, iterations
    );
    println!(
        "   Speedup: {:.1}x faster with thread-local pooling",
        speedup
    );

    if speedup > 2.0 {
        println!("✅ THREAD-LOCAL OPTIMIZATION: Significant performance improvement");
        println!("💡 RECOMMENDATION: Consider implementing for high-load scenarios");
    } else {
        println!("📊 THREAD-LOCAL OPTIMIZATION: Marginal improvement");
    }
}

/// **AUDIT TEST**: Verify which ID generator is used in OTLP span creation path.
///
/// **SCENARIO**: Trace the code path from span creation to ID generation.
/// **REQUIREMENT**: High-frequency spans should use optimal ID generation.
/// **ASSESSMENT**: Current implementation analysis.
#[test]
fn audit_otlp_span_id_generation_code_path() {
    println!("🔍 AUDIT: OTLP span ID generation code path analysis");

    use crate::observability::otlp_trace_exporter::OtlpSpan;

    println!("📋 Code path analysis:");
    println!("   • OtlpSpan creation");
    println!("   • ID generation methods");
    println!("   • Performance characteristics");

    // Test OtlpSpan creation patterns
    let start = Instant::now();
    let span_count = 10_000;

    for i in 0..span_count {
        let _span = OtlpSpan::new(
            format!("span-{}", i),
            "test_operation".to_string(),
            1000000000,
            1000001000,
            vec![("test".to_string(), "true".to_string())],
        );
    }

    let creation_duration = start.elapsed();
    let spans_per_sec = span_count as f64 / creation_duration.as_secs_f64();

    println!("📊 OtlpSpan creation performance:");
    println!("   Created {} spans in {:?}", span_count, creation_duration);
    println!("   Throughput: {:.0} spans/sec", spans_per_sec);

    // Analyze ID generation in span creation
    // NOTE: OtlpSpan::new() uses provided span_id string, not internal generation
    // The actual ID generation happens in the service layer that calls OtlpSpan::new()

    println!("📊 ID generation analysis:");
    println!("   • OtlpSpan::new() uses provided span_id (string)");
    println!("   • Actual ID generation happens in calling service");
    println!("   • W3C context for distributed traces");
    println!("   • Internal context for local spans");

    let target_throughput = 100_000.0; // 100K spans/sec (10% of 1M target)

    if spans_per_sec >= target_throughput {
        println!("✅ OTLP SPAN CREATION: Meets high-throughput requirements");
    } else {
        println!("⚠️  OTLP SPAN CREATION: May need optimization for extreme loads");
    }

    println!("✅ CODE PATH ANALYSIS COMPLETE");
    println!("💡 KEY FINDING: ID generation performance depends on which generator is used");
    println!("   - AtomicU64 (context): Excellent for high-frequency local spans");
    println!("   - getrandom (W3C): Good for distributed trace initiation");
    println!("   - Thread-local pooling: Optimization option for extreme loads");
}