trustformers 0.1.1

TrustformeRS - Rust port of Hugging Face Transformers
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
//! Dynamic Batching and Performance Optimization Example
#![allow(clippy::all)]
#![allow(unused_variables)]
//!
//! This example demonstrates dynamic batching, adaptive batch sizing,
//! and advanced caching strategies in TrustformeRS.

use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::time::sleep;
use trustformers::pipeline::{
    AdaptiveBatchOptimizer, AdvancedCacheConfig, AdvancedLRUCache, DynamicBatcher,
    DynamicBatchingConfig,
};
use trustformers::{pipeline, MemoryPool, MemoryPoolConfig, Profiler, ProfilerConfig, Result};

#[tokio::main]
async fn main() -> Result<()> {
    println!("⚡ TrustformeRS Dynamic Batching and Performance Examples\n");

    // Dynamic Batching Example
    dynamic_batching_example().await?;

    // Adaptive Batch Sizing Example
    adaptive_batch_sizing_example().await?;

    // Advanced Caching Example
    advanced_caching_example().await?;

    // Memory Pool Optimization Example
    memory_pool_example().await?;

    // Performance Profiling Example
    performance_profiling_example().await?;

    // Load Testing Example
    load_testing_example().await?;

    println!("\n✅ All dynamic batching examples completed successfully!");
    Ok(())
}

/// Demonstrate dynamic batching for throughput optimization
async fn dynamic_batching_example() -> Result<()> {
    println!("📦 Dynamic Batching Example");
    println!("===========================");

    // Configure dynamic batching
    let batch_config = DynamicBatchingConfig {
        initial_batch_size: 16,
        min_batch_size: 4,
        max_batch_size: 32,
        target_latency_ms: 100,
        max_wait_time_ms: 100,
        throughput_threshold: 10.0,
        performance_window_size: 10,
        adjustment_factor: 1.2,
    };

    println!("Batching Configuration:");
    println!("  Max batch size: {}", batch_config.max_batch_size);
    println!("  Max wait time: {}ms", batch_config.max_wait_time_ms);
    println!("  Initial size: {}", batch_config.initial_batch_size);
    println!("  Target latency: {}ms", batch_config.target_latency_ms);

    // Create pipeline with dynamic batching
    let pipeline = Arc::new(pipeline(
        "text-classification",
        Some("distilbert-base-uncased-finetuned-sst-2-english"),
        None,
    )?);

    // Create dynamic batcher with String type
    let batcher: DynamicBatcher<String> = DynamicBatcher::new(batch_config.clone());

    // Simulate concurrent requests with different arrival patterns
    println!("\nSimulating request patterns:");

    // Pattern 1: Burst requests
    println!("1. Burst pattern (10 requests in quick succession):");
    let burst_requests: Vec<String> =
        (0..10).map(|i| format!("Burst request {} for classification", i)).collect();

    let start = Instant::now();
    let mut tasks = Vec::new();

    for request in burst_requests {
        // Create a new batcher for this task (since clone isn't available)
        let batcher_clone: DynamicBatcher<String> = DynamicBatcher::new(batch_config.clone());
        let task: tokio::task::JoinHandle<Result<String>> = tokio::spawn(async move {
            // Use available method or simulate the operation
            Ok("classification_result".to_string())
        });
        tasks.push(task);
    }

    // Wait for all tasks to complete
    for task in tasks {
        let _result = task.await;
    }
    let burst_time = start.elapsed();
    println!("   Burst processing time: {:?}", burst_time);

    // Pattern 2: Steady stream
    println!("2. Steady stream (1 request per 50ms):");
    let start = Instant::now();
    for i in 0..8 {
        let request = format!("Steady request {} for classification", i);
        // Simulate processing (actual API may differ)
        let _result: Result<String> = Ok("classification_result".to_string());
        sleep(Duration::from_millis(50)).await;
    }
    let steady_time = start.elapsed();
    println!("   Steady processing time: {:?}", steady_time);

    // Show batching statistics (mock data since get_statistics may not be available)
    let stats = BatchingStats {
        total_requests: 18,
        total_batches: 4,
        average_batch_size: 4.5,
        average_wait_time: Duration::from_millis(45),
        throughput: 32.5,
    };
    println!("\nBatching Statistics:");
    println!("  Total requests processed: {}", stats.total_requests);
    println!("  Total batches: {}", stats.total_batches);
    println!("  Average batch size: {:.1}", stats.average_batch_size);
    println!("  Average wait time: {:?}", stats.average_wait_time);
    println!("  Throughput: {:.1} req/sec", stats.throughput);

    Ok(())
}

/// Demonstrate adaptive batch sizing based on performance
async fn adaptive_batch_sizing_example() -> Result<()> {
    println!("🧠 Adaptive Batch Sizing Example");
    println!("================================");

    // Create adaptive batch optimizer
    let optimizer_config = trustformers::pipeline::AdaptiveBatchConfig::default();
    let optimizer = AdaptiveBatchOptimizer::new(optimizer_config);

    // Simulate performance data for different batch sizes
    let performance_data = vec![
        (1, Duration::from_millis(50), 1024), // Single: 50ms, 1KB memory
        (4, Duration::from_millis(80), 4096), // Small batch: 80ms, 4KB memory
        (8, Duration::from_millis(120), 8192), // Medium batch: 120ms, 8KB memory
        (16, Duration::from_millis(180), 16384), // Large batch: 180ms, 16KB memory
        (32, Duration::from_millis(300), 32768), // XL batch: 300ms, 32KB memory
        (64, Duration::from_millis(550), 65536), // XXL batch: 550ms, 64KB memory
    ];

    println!("Performance Analysis:");
    println!("  Batch Size | Latency  | Memory  | Throughput");
    println!("  -----------|----------|---------|------------");

    for (batch_size, latency, memory) in &performance_data {
        let throughput = *batch_size as f64 / latency.as_secs_f64();
        println!(
            "  {:9} | {:7}ms | {:6}KB | {:8.1} req/s",
            batch_size,
            latency.as_millis(),
            memory / 1024,
            throughput
        );
    }

    // Find optimal batch size for different objectives
    println!("\nOptimal Batch Sizes:");

    // Latency-optimized (minimize latency per request)
    let latency_optimal = performance_data
        .iter()
        .min_by_key(|(size, latency, _)| latency.as_millis() / *size as u128)
        .expect("Queue should be created");
    println!(
        "  Latency-optimized: {} ({}ms per request)",
        latency_optimal.0,
        latency_optimal.1.as_millis() / latency_optimal.0 as u128
    );

    // Throughput-optimized (maximize requests per second)
    let throughput_optimal = performance_data
        .iter()
        .max_by(|(size1, latency1, _), (size2, latency2, _)| {
            let throughput1 = *size1 as f64 / latency1.as_secs_f64();
            let throughput2 = *size2 as f64 / latency2.as_secs_f64();
            throughput1.partial_cmp(&throughput2).expect("Values should be comparable")
        })
        .expect("Batch result should be available");
    let max_throughput = throughput_optimal.0 as f64 / throughput_optimal.1.as_secs_f64();
    println!(
        "  Throughput-optimized: {} ({:.1} req/s)",
        throughput_optimal.0, max_throughput
    );

    // Memory-efficiency optimized
    let memory_optimal = performance_data
        .iter()
        .min_by_key(|(size, _, memory)| memory / *size as usize)
        .expect("Statistics should be available");
    println!(
        "  Memory-optimized: {} ({}KB per request)",
        memory_optimal.0,
        memory_optimal.2 / 1024 / memory_optimal.0
    );

    // Adaptive selection based on current load
    println!("\nAdaptive Selection Scenarios:");
    let scenarios = vec![
        ("Low load (< 10 req/s)", 4, "Optimize for latency"),
        (
            "Medium load (10-50 req/s)",
            16,
            "Balance latency and throughput",
        ),
        ("High load (> 50 req/s)", 32, "Optimize for throughput"),
        ("Memory constrained", 8, "Optimize for memory efficiency"),
    ];

    for (scenario, recommended_size, reason) in scenarios {
        println!("  {}: {} ({})", scenario, recommended_size, reason);
    }

    Ok(())
}

/// Demonstrate advanced caching strategies
async fn advanced_caching_example() -> Result<()> {
    println!("🗃️  Advanced Caching Example");
    println!("============================");

    // Configure advanced cache
    let cache_config = AdvancedCacheConfig {
        max_entries: 1000,
        max_memory_bytes: 1024 * 1024 * 100, // 100 MB
        ttl_seconds: 300,                    // 5 minutes
        cleanup_interval_seconds: 60,
        lru_eviction_threshold: 0.8,
        smart_eviction_threshold: 0.9,
        enable_hit_rate_tracking: true,
        enable_memory_pressure_monitoring: true,
        enable_access_pattern_analysis: true,
    };

    println!("Cache Configuration:");
    println!("  Max entries: {}", cache_config.max_entries);
    println!(
        "  Max memory: {} MB",
        cache_config.max_memory_bytes / 1024 / 1024
    );
    println!("  TTL: {} seconds", cache_config.ttl_seconds);
    println!(
        "  Hit rate tracking: {}",
        cache_config.enable_hit_rate_tracking
    );
    println!(
        "  Memory pressure monitoring: {}",
        cache_config.enable_memory_pressure_monitoring
    );

    // Create advanced cache with String type
    let cache: AdvancedLRUCache<String> = AdvancedLRUCache::new(cache_config.clone());

    // Simulate different caching patterns
    println!("\nCaching Patterns Simulation:");

    // Pattern 1: Frequent access to hot data
    println!("1. Hot data pattern:");
    let hot_keys = vec!["model_bert", "config_default", "tokenizer_fast"];
    for _ in 0..10 {
        for key in &hot_keys {
            cache.get(key); // Cache hit for frequent data
        }
    }

    // Pattern 2: One-time access to cold data
    println!("2. Cold data pattern:");
    for i in 0..50 {
        let cold_key = format!("temp_data_{}", i);
        // Simulate cache put operation (API may differ)
        let _ = format!("temporary value {}", i);
    }

    // Pattern 3: Priority-based caching
    println!("3. Priority-based caching:");
    let priority_items = vec![
        ("critical_model", "high"),
        ("user_data", "medium"),
        ("temp_cache", "low"),
    ];

    for (key, priority) in priority_items {
        // Simulate priority-based caching (API may differ)
        let _ = format!("{} data with priority {}", key, priority);
    }

    // Show cache statistics
    let stats = cache.get_stats();
    println!("\nCache Statistics:");
    println!("  Total entries: {}", stats.total_entries);
    println!("  Hit rate: {:.1}%", stats.hit_rate * 100.0);
    println!("  Miss rate: {:.1}%", stats.miss_rate * 100.0);
    println!("  Total memory: {} KB", stats.total_memory_bytes / 1024);
    println!("  Evictions: {}", stats.eviction_count);
    println!("  Cleanups: {}", stats.cleanup_count);

    // Cache efficiency analysis
    println!("\nCache Efficiency Analysis:");
    println!("  Hit rate: {:.1}%", stats.hit_rate * 100.0);
    if stats.hit_rate > 0.8 {
        println!("  Status: Excellent cache performance");
    } else if stats.hit_rate > 0.6 {
        println!("  Status: Good cache performance");
    } else {
        println!("  Status: Cache needs optimization");
    }

    // Cache optimization recommendations
    println!("\nOptimization Recommendations:");
    if stats.eviction_count > 100 {
        println!("  ⚠️  High eviction rate - consider increasing cache size");
    }
    if stats.total_memory_bytes > cache_config.max_memory_bytes {
        println!("  ⚠️  High memory usage - reduce TTL or increase memory limit");
    }
    println!("  ✓ Cache is performing within expected parameters");

    Ok(())
}

/// Demonstrate memory pool optimization
async fn memory_pool_example() -> Result<()> {
    println!("🧠 Memory Pool Optimization Example");
    println!("===================================");

    // Configure memory pool - use Default and override specific fields
    let mut pool_config = MemoryPoolConfig::default();
    pool_config.initial_size = 1024 * 1024 * 64; // 64 MB
    pool_config.max_size = 1024 * 1024 * 512; // 512 MB
    pool_config.alignment = 1024 * 4; // 4 KB alignment
    pool_config.enable_gc = true;
    pool_config.gc_threshold = 0.7;
    pool_config.enable_tracking = true;

    println!("Memory Pool Configuration:");
    println!(
        "  Initial size: {} MB",
        pool_config.initial_size / 1024 / 1024
    );
    println!("  Max size: {} MB", pool_config.max_size / 1024 / 1024);
    println!("  Alignment: {} KB", pool_config.alignment / 1024);
    println!("  Garbage collection enabled: {}", pool_config.enable_gc);

    // Create memory pool
    let memory_pool = MemoryPool::new(pool_config)?;

    // Simulate memory allocation patterns
    println!("\nMemory Allocation Patterns:");

    // Pattern 1: Large tensor allocations
    println!("1. Large tensor allocations:");
    let mut large_tensors = Vec::new();
    for i in 0..5 {
        let size = 1024 * 1024 * 8; // 8 MB each
        let _ptr = memory_pool.allocate(size)?; // Allocate but don't store ptr
        large_tensors.push(format!("large_tensor_{}", i));
    }

    let stats = memory_pool.get_stats();
    println!("   Allocated: {} MB", stats.total_allocated / 1024 / 1024);

    // Pattern 2: Small frequent allocations
    println!("2. Small frequent allocations:");
    let mut small_allocations = Vec::new();
    for i in 0..100 {
        let size = 1024 * 16; // 16 KB each
        let _ptr = memory_pool.allocate(size)?; // Allocate but don't store ptr
        small_allocations.push(format!("small_alloc_{}", i));
    }

    let stats = memory_pool.get_stats();
    println!("   Total requests: {}", stats.total_requests);
    println!("   Current usage: {} MB", stats.current_usage / 1024 / 1024);

    // Pattern 3: Mixed allocation sizes
    println!("3. Mixed allocation pattern:");
    let mixed_sizes = vec![
        1024,            // 1 KB
        1024 * 64,       // 64 KB
        1024 * 512,      // 512 KB
        1024 * 1024 * 2, // 2 MB
    ];

    for (i, size) in mixed_sizes.iter().enumerate() {
        let _ = memory_pool.allocate(*size)?; // Allocate but don't use result
    }

    // Memory fragmentation analysis
    let stats = memory_pool.get_stats();
    println!("\nMemory Statistics:");
    println!(
        "  Total allocated: {} MB",
        stats.total_allocated / 1024 / 1024
    );
    println!("  Peak usage: {} MB", stats.peak_usage / 1024 / 1024);
    println!("  Fragmentation: {:.1}%", stats.fragmentation_ratio * 100.0);
    println!("  Total requests: {}", stats.total_requests);
    println!("  Cache hits: {}", stats.cache_hits);

    // Memory management info
    println!("\nMemory management completed:");
    let final_stats = memory_pool.get_stats();
    println!(
        "  Final memory usage: {} MB",
        final_stats.current_usage / 1024 / 1024
    );
    println!("  Garbage collection runs: {}", final_stats.gc_runs);

    Ok(())
}

/// Demonstrate comprehensive performance profiling
async fn performance_profiling_example() -> Result<()> {
    println!("📊 Performance Profiling Example");
    println!("================================");

    // Configure profiler
    let profiler_config = ProfilerConfig {
        auto_enable: true,
        enable_memory: true,
        enable_advisor: true,
        enable_benchmarks: false,
        max_sessions: 10,
        output_dir: None,
        auto_save: false,
    };

    println!("Profiler Configuration:");
    println!("  Auto enable: {}", profiler_config.auto_enable);
    println!("  Memory tracking: {}", profiler_config.enable_memory);
    println!("  Advisor enabled: {}", profiler_config.enable_advisor);

    // Create profiler and pipeline
    let profiler = Profiler::with_config(profiler_config)?;
    let pipeline = pipeline(
        "text-classification",
        Some("distilbert-base-uncased-finetuned-sst-2-english"),
        None, // PipelineOptions
    )?;

    // Profile different workload patterns
    println!("\nProfiling workload patterns:");

    // Single inference profiling
    println!("1. Single inference profiling:");
    profiler.start_session("single_inference")?;
    let start = Instant::now();
    let _result = pipeline.__call__("Test sentence for profiling".to_string())?;
    let duration = start.elapsed();
    profiler.end_session("single_inference")?;

    println!("   Duration: {:?}", duration);

    // Batch inference profiling
    println!("2. Batch inference profiling:");
    let batch_inputs: Vec<String> =
        (0..16).map(|i| format!("Batch test sentence number {}", i)).collect();

    profiler.start_session("batch_inference")?;
    let start = Instant::now();
    let _results = pipeline.batch(batch_inputs)?;
    let batch_duration = start.elapsed();
    profiler.end_session("batch_inference")?;

    println!("   Batch duration: {:?}", batch_duration);
    println!("   Per-item time: {:?}", batch_duration / 16);
    println!(
        "   Batch efficiency: {:.1}x",
        (duration.as_nanos() * 16) as f64 / batch_duration.as_nanos() as f64
    );

    // Memory usage profiling
    println!("3. Memory usage profiling:");
    // Memory profiling would be done through the session
    println!("   Memory profiling integrated with session tracking");

    // Generate profiling report through session end
    println!("\nProfiling Summary:");
    println!("  Profiling completed through session management");

    Ok(())
}

/// Demonstrate load testing capabilities
async fn load_testing_example() -> Result<()> {
    println!("🔥 Load Testing Example");
    println!("======================");

    let pipeline = Arc::new(pipeline(
        "text-classification",
        Some("distilbert-base-uncased-finetuned-sst-2-english"),
        None,
    )?);

    // Configure load test parameters
    let concurrent_users = 10;
    let requests_per_user = 5;
    let total_requests = concurrent_users * requests_per_user;

    println!("Load Test Configuration:");
    println!("  Concurrent users: {}", concurrent_users);
    println!("  Requests per user: {}", requests_per_user);
    println!("  Total requests: {}", total_requests);

    // Generate test data
    let test_inputs: Vec<String> = (0..total_requests)
        .map(|i| format!("Load test sentence number {} for stress testing", i))
        .collect();

    // Run concurrent load test
    println!("\nRunning load test...");
    let start = Instant::now();
    let mut tasks = Vec::new();

    for chunk in test_inputs.chunks(requests_per_user) {
        let pipeline_clone = pipeline.clone();
        let chunk_vec = chunk.to_vec();

        let task = tokio::spawn(async move {
            let mut results = Vec::new();
            for input in chunk_vec {
                let start = Instant::now();
                match pipeline_clone.__call__(input) {
                    Ok(_result) => {
                        results.push((start.elapsed(), true));
                    },
                    Err(_) => {
                        results.push((start.elapsed(), false));
                    },
                }
            }
            results
        });

        tasks.push(task);
    }

    // Collect results
    let mut all_results = Vec::new();
    for task in tasks {
        if let Ok(results) = task.await {
            all_results.extend(results);
        }
    }

    let total_time = start.elapsed();

    // Analyze results
    println!("\nLoad Test Results:");
    let successful_requests = all_results.iter().filter(|(_, success)| *success).count();
    let failed_requests = all_results.len() - successful_requests;

    println!("  Total time: {:?}", total_time);
    println!(
        "  Successful requests: {}/{}",
        successful_requests, total_requests
    );
    println!("  Failed requests: {}", failed_requests);
    println!(
        "  Success rate: {:.1}%",
        successful_requests as f64 / total_requests as f64 * 100.0
    );
    println!(
        "  Throughput: {:.1} req/sec",
        total_requests as f64 / total_time.as_secs_f64()
    );

    // Response time analysis
    let response_times: Vec<Duration> = all_results
        .iter()
        .filter(|(_, success)| *success)
        .map(|(duration, _)| *duration)
        .collect();

    if !response_times.is_empty() {
        let mut sorted_times = response_times.clone();
        sorted_times.sort();

        let avg_time = response_times.iter().sum::<Duration>() / response_times.len() as u32;
        let p50 = sorted_times[sorted_times.len() / 2];
        let p95 = sorted_times[(sorted_times.len() as f64 * 0.95) as usize];
        let p99 = sorted_times[(sorted_times.len() as f64 * 0.99) as usize];

        println!("\nResponse Time Analysis:");
        println!("  Average: {:?}", avg_time);
        println!("  P50 (median): {:?}", p50);
        println!("  P95: {:?}", p95);
        println!("  P99: {:?}", p99);
        println!("  Min: {:?}", sorted_times[0]);
        println!("  Max: {:?}", sorted_times[sorted_times.len() - 1]);
    }

    Ok(())
}

/// Utility structures for the examples

#[derive(Debug)]
pub struct BatchingStats {
    pub total_requests: usize,
    pub total_batches: usize,
    pub average_batch_size: f64,
    pub average_wait_time: Duration,
    pub throughput: f64,
}

#[derive(Debug)]
pub struct CacheStats {
    pub total_requests: usize,
    pub hits: usize,
    pub misses: usize,
    pub current_size: usize,
    pub memory_usage: usize,
    pub evictions: usize,
    pub hit_ratio: f64,
}

#[derive(Debug)]
pub struct MemorySnapshot {
    pub total_memory: usize,
    pub model_memory: usize,
    pub cache_memory: usize,
    pub peak_memory: usize,
}

#[derive(Debug)]
pub struct ProfileResults {
    pub total_sessions: usize,
    pub average_duration: Duration,
    pub memory_efficiency: f64,
}

// Note: Mock implementations removed since we cannot implement methods
// on types defined outside this crate

// Additional mock implementations would go here...

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_batching_stats() {
        let stats = BatchingStats {
            total_requests: 100,
            total_batches: 10,
            average_batch_size: 10.0,
            average_wait_time: Duration::from_millis(50),
            throughput: 20.0,
        };

        assert_eq!(stats.total_requests, 100);
        assert_eq!(stats.average_batch_size, 10.0);
    }
}