1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::sync::Arc;
9use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
10use tokio::sync::RwLock;
11
12pub mod memory_optimizer;
13pub mod metrics;
14pub mod monitor;
15pub mod optimizer;
16pub mod phoneme_cache;
17pub mod profiler;
18pub mod streaming_optimizer;
19
20type TimingsStore = Arc<RwLock<HashMap<String, Vec<Duration>>>>;
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct PerformanceMetrics {
26 pub system: SystemMetrics,
28 pub synthesis: SynthesisMetrics,
30 pub memory: MemoryMetrics,
32 pub gpu: Option<GpuMetrics>,
34 pub timestamp: u64,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct SystemMetrics {
41 pub cpu_usage: f64,
43 pub memory_used: u64,
45 pub memory_available: u64,
47 pub disk_read_bps: u64,
49 pub disk_write_bps: u64,
51 pub network_bps: u64,
53 pub thread_count: usize,
55 pub load_average: Option<f64>,
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct SynthesisMetrics {
62 pub total_operations: u64,
64 pub successful_operations: u64,
66 pub failed_operations: u64,
68 pub avg_synthesis_time_ms: f64,
70 pub total_audio_duration: f64,
72 pub real_time_factor: f64,
74 pub throughput_chars_per_sec: f64,
76 pub queue_depth: usize,
78 pub memory_per_operation_mb: f64,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct MemoryMetrics {
85 pub heap_used: u64,
87 pub peak_usage: u64,
89 pub allocations_per_sec: f64,
91 pub deallocations_per_sec: f64,
93 pub gc_events: u64,
95 pub fragmentation_percent: f64,
97 pub cache_hit_rate: f64,
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct GpuMetrics {
104 pub utilization: f64,
106 pub memory_used: u64,
108 pub memory_total: u64,
110 pub temperature: f64,
112 pub power_consumption: f64,
114 pub compute_units_active: usize,
116 pub memory_bandwidth_util: f64,
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct OptimizationRecommendation {
123 pub category: OptimizationCategory,
125 pub priority: u8,
127 pub description: String,
129 pub recommendation: String,
131 pub expected_improvement: String,
133 pub difficulty: u8,
135 pub performance_impact: f64,
137}
138
139#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
141pub enum OptimizationCategory {
142 Memory,
144 Cpu,
146 Gpu,
148 Io,
150 Network,
152 Caching,
154 Parallelization,
156 ModelOptimization,
158 Configuration,
160 ResourceAllocation,
162}
163
164pub struct PerformanceProfiler {
166 metrics_history: Arc<RwLock<Vec<PerformanceMetrics>>>,
168 operation_timings: Arc<RwLock<HashMap<String, Vec<Duration>>>>,
170 recommendations: Arc<RwLock<Vec<OptimizationRecommendation>>>,
172 start_time: Instant,
174 enabled: bool,
176 max_history_size: usize,
178}
179
180impl PerformanceProfiler {
181 pub fn new(enabled: bool, max_history_size: usize) -> Self {
183 Self {
184 metrics_history: Arc::new(RwLock::new(Vec::with_capacity(max_history_size))),
185 operation_timings: Arc::new(RwLock::new(HashMap::new())),
186 recommendations: Arc::new(RwLock::new(Vec::new())),
187 start_time: Instant::now(),
188 enabled,
189 max_history_size,
190 }
191 }
192
193 pub async fn record_metrics(&self, metrics: PerformanceMetrics) {
195 if !self.enabled {
196 return;
197 }
198
199 let mut history = self.metrics_history.write().await;
200
201 if history.len() >= self.max_history_size {
203 history.remove(0);
204 }
205
206 history.push(metrics);
207 }
208
209 pub async fn start_operation(&self, operation_name: &str) -> OperationTimer {
211 if !self.enabled {
212 return OperationTimer::disabled();
213 }
214
215 OperationTimer::new(
216 operation_name.to_string(),
217 self.operation_timings.clone(),
218 Instant::now(),
219 )
220 }
221
222 pub async fn get_metrics_summary(&self) -> Option<PerformanceMetrics> {
224 if !self.enabled {
225 return None;
226 }
227
228 let history = self.metrics_history.read().await;
229 history.last().cloned()
230 }
231
232 pub async fn get_timing_stats(&self, operation_name: &str) -> Option<TimingStats> {
234 if !self.enabled {
235 return None;
236 }
237
238 let timings = self.operation_timings.read().await;
239 timings
240 .get(operation_name)
241 .map(|durations| TimingStats::from_durations(durations))
242 }
243
244 pub async fn generate_recommendations(&self) -> Vec<OptimizationRecommendation> {
246 if !self.enabled {
247 return Vec::new();
248 }
249
250 let mut recommendations = Vec::new();
251 let history = self.metrics_history.read().await;
252
253 if history.is_empty() {
254 return recommendations;
255 }
256
257 let recent_metrics = &history[history.len().saturating_sub(10)..];
259
260 self.check_memory_optimizations(&mut recommendations, recent_metrics)
262 .await;
263
264 self.check_cpu_optimizations(&mut recommendations, recent_metrics)
266 .await;
267
268 self.check_gpu_optimizations(&mut recommendations, recent_metrics)
270 .await;
271
272 self.check_io_optimizations(&mut recommendations, recent_metrics)
274 .await;
275
276 recommendations.sort_by(|a, b| b.priority.cmp(&a.priority));
278
279 let mut cached_recommendations = self.recommendations.write().await;
281 *cached_recommendations = recommendations.clone();
282
283 recommendations
284 }
285
286 async fn check_memory_optimizations(
288 &self,
289 recommendations: &mut Vec<OptimizationRecommendation>,
290 metrics: &[PerformanceMetrics],
291 ) {
292 let avg_memory_usage = metrics
293 .iter()
294 .map(|m| m.memory.heap_used as f64)
295 .sum::<f64>()
296 / metrics.len() as f64;
297
298 let total_memory = metrics
299 .iter()
300 .map(|m| m.system.memory_used + m.system.memory_available)
301 .max()
302 .unwrap_or(0) as f64;
303
304 let memory_usage_percent = (avg_memory_usage / total_memory) * 100.0;
305
306 if memory_usage_percent > 80.0 {
307 recommendations.push(OptimizationRecommendation {
308 category: OptimizationCategory::Memory,
309 priority: 9,
310 description: format!("High memory usage detected: {:.1}%", memory_usage_percent),
311 recommendation: "Consider enabling memory optimization flags, reducing batch sizes, or using streaming processing for large texts".to_string(),
312 expected_improvement: "20-40% reduction in memory usage".to_string(),
313 difficulty: 2,
314 performance_impact: 0.3,
315 });
316 }
317
318 let avg_fragmentation = metrics
320 .iter()
321 .map(|m| m.memory.fragmentation_percent)
322 .sum::<f64>()
323 / metrics.len() as f64;
324
325 if avg_fragmentation > 15.0 {
326 recommendations.push(OptimizationRecommendation {
327 category: OptimizationCategory::Memory,
328 priority: 6,
329 description: format!("Memory fragmentation detected: {:.1}%", avg_fragmentation),
330 recommendation:
331 "Enable memory pool allocation or restart the application periodically"
332 .to_string(),
333 expected_improvement: "10-20% improvement in memory efficiency".to_string(),
334 difficulty: 3,
335 performance_impact: 0.15,
336 });
337 }
338
339 let avg_cache_hit_rate =
341 metrics.iter().map(|m| m.memory.cache_hit_rate).sum::<f64>() / metrics.len() as f64;
342
343 if avg_cache_hit_rate < 70.0 {
344 recommendations.push(OptimizationRecommendation {
345 category: OptimizationCategory::Caching,
346 priority: 7,
347 description: format!("Low cache hit rate: {:.1}%", avg_cache_hit_rate),
348 recommendation: "Increase cache size, implement more aggressive caching, or use model preloading".to_string(),
349 expected_improvement: "15-30% improvement in synthesis speed".to_string(),
350 difficulty: 3,
351 performance_impact: 0.25,
352 });
353 }
354 }
355
356 async fn check_cpu_optimizations(
358 &self,
359 recommendations: &mut Vec<OptimizationRecommendation>,
360 metrics: &[PerformanceMetrics],
361 ) {
362 let avg_cpu_usage =
363 metrics.iter().map(|m| m.system.cpu_usage).sum::<f64>() / metrics.len() as f64;
364
365 if avg_cpu_usage > 90.0 {
366 recommendations.push(OptimizationRecommendation {
367 category: OptimizationCategory::Cpu,
368 priority: 8,
369 description: format!("High CPU usage detected: {:.1}%", avg_cpu_usage),
370 recommendation: "Enable GPU acceleration, reduce parallel processing threads, or use lower quality settings".to_string(),
371 expected_improvement: "30-50% reduction in CPU usage".to_string(),
372 difficulty: 2,
373 performance_impact: 0.4,
374 });
375 } else if avg_cpu_usage < 30.0 {
376 recommendations.push(OptimizationRecommendation {
377 category: OptimizationCategory::Parallelization,
378 priority: 5,
379 description: format!("Low CPU utilization: {:.1}%", avg_cpu_usage),
380 recommendation: "Increase parallel processing threads or batch size to better utilize available CPU cores".to_string(),
381 expected_improvement: "20-40% improvement in throughput".to_string(),
382 difficulty: 2,
383 performance_impact: 0.3,
384 });
385 }
386
387 let avg_rtf = metrics
389 .iter()
390 .map(|m| m.synthesis.real_time_factor)
391 .sum::<f64>()
392 / metrics.len() as f64;
393
394 if avg_rtf < 1.0 {
395 recommendations.push(OptimizationRecommendation {
396 category: OptimizationCategory::ModelOptimization,
397 priority: 8,
398 description: format!("Poor real-time factor: {:.2}x", avg_rtf),
399 recommendation: "Use quantized models, enable GPU acceleration, or reduce quality settings for real-time applications".to_string(),
400 expected_improvement: "Achieve real-time synthesis (>1.0x RTF)".to_string(),
401 difficulty: 4,
402 performance_impact: 0.5,
403 });
404 }
405 }
406
407 async fn check_gpu_optimizations(
409 &self,
410 recommendations: &mut Vec<OptimizationRecommendation>,
411 metrics: &[PerformanceMetrics],
412 ) {
413 let gpu_available = metrics.iter().any(|m| m.gpu.is_some());
414
415 if !gpu_available {
416 recommendations.push(OptimizationRecommendation {
417 category: OptimizationCategory::Gpu,
418 priority: 6,
419 description: "GPU acceleration not detected".to_string(),
420 recommendation: "Enable GPU acceleration if available, or consider using cloud GPU instances for large workloads".to_string(),
421 expected_improvement: "2-10x improvement in synthesis speed".to_string(),
422 difficulty: 3,
423 performance_impact: 0.8,
424 });
425 return;
426 }
427
428 let gpu_metrics: Vec<&GpuMetrics> = metrics.iter().filter_map(|m| m.gpu.as_ref()).collect();
430
431 if !gpu_metrics.is_empty() {
432 let avg_gpu_utilization =
433 gpu_metrics.iter().map(|g| g.utilization).sum::<f64>() / gpu_metrics.len() as f64;
434
435 if avg_gpu_utilization < 30.0 {
436 recommendations.push(OptimizationRecommendation {
437 category: OptimizationCategory::Gpu,
438 priority: 7,
439 description: format!("Low GPU utilization: {:.1}%", avg_gpu_utilization),
440 recommendation: "Increase batch size, use larger models, or enable more GPU-accelerated features".to_string(),
441 expected_improvement: "Better GPU utilization and potentially faster processing".to_string(),
442 difficulty: 2,
443 performance_impact: 0.3,
444 });
445 }
446
447 let avg_gpu_memory_usage = gpu_metrics
449 .iter()
450 .map(|g| (g.memory_used as f64 / g.memory_total as f64) * 100.0)
451 .sum::<f64>()
452 / gpu_metrics.len() as f64;
453
454 if avg_gpu_memory_usage > 85.0 {
455 recommendations.push(OptimizationRecommendation {
456 category: OptimizationCategory::Gpu,
457 priority: 8,
458 description: format!("High GPU memory usage: {:.1}%", avg_gpu_memory_usage),
459 recommendation: "Reduce batch size, use model quantization, or enable gradient checkpointing".to_string(),
460 expected_improvement: "Prevent GPU memory overflow and improve stability".to_string(),
461 difficulty: 3,
462 performance_impact: 0.2,
463 });
464 }
465 }
466 }
467
468 async fn check_io_optimizations(
470 &self,
471 recommendations: &mut Vec<OptimizationRecommendation>,
472 metrics: &[PerformanceMetrics],
473 ) {
474 let avg_disk_read =
475 metrics.iter().map(|m| m.system.disk_read_bps).sum::<u64>() / metrics.len() as u64;
476
477 let avg_disk_write =
478 metrics.iter().map(|m| m.system.disk_write_bps).sum::<u64>() / metrics.len() as u64;
479
480 if avg_disk_read > 100_000_000 || avg_disk_write > 100_000_000 {
482 recommendations.push(OptimizationRecommendation {
483 category: OptimizationCategory::Io,
484 priority: 6,
485 description: format!(
486 "High disk I/O: {:.1} MB/s read, {:.1} MB/s write",
487 avg_disk_read as f64 / 1_000_000.0,
488 avg_disk_write as f64 / 1_000_000.0
489 ),
490 recommendation:
491 "Use SSD storage, enable I/O caching, or process files in memory when possible"
492 .to_string(),
493 expected_improvement: "20-50% reduction in I/O bottlenecks".to_string(),
494 difficulty: 3,
495 performance_impact: 0.3,
496 });
497 }
498
499 let avg_queue_depth = metrics
501 .iter()
502 .map(|m| m.synthesis.queue_depth)
503 .sum::<usize>()
504 / metrics.len();
505
506 if avg_queue_depth > 10 {
507 recommendations.push(OptimizationRecommendation {
508 category: OptimizationCategory::ResourceAllocation,
509 priority: 7,
510 description: format!("High operation queue depth: {}", avg_queue_depth),
511 recommendation: "Increase worker threads, enable parallel processing, or optimize resource allocation".to_string(),
512 expected_improvement: "Reduced latency and better throughput".to_string(),
513 difficulty: 2,
514 performance_impact: 0.25,
515 });
516 }
517 }
518
519 pub fn uptime(&self) -> Duration {
521 self.start_time.elapsed()
522 }
523
524 pub fn set_enabled(&mut self, enabled: bool) {
526 self.enabled = enabled;
527 }
528
529 pub async fn clear_history(&self) {
531 let mut history = self.metrics_history.write().await;
532 history.clear();
533
534 let mut timings = self.operation_timings.write().await;
535 timings.clear();
536
537 let mut recommendations = self.recommendations.write().await;
538 recommendations.clear();
539 }
540}
541
542pub struct OperationTimer {
544 operation_name: String,
545 timings_store: Option<TimingsStore>,
546 start_time: Instant,
547}
548
549impl OperationTimer {
550 fn new(operation_name: String, timings_store: TimingsStore, start_time: Instant) -> Self {
551 Self {
552 operation_name,
553 timings_store: Some(timings_store),
554 start_time,
555 }
556 }
557
558 fn disabled() -> Self {
559 Self {
560 operation_name: String::new(),
561 timings_store: None,
562 start_time: Instant::now(),
563 }
564 }
565
566 pub async fn stop(self) -> Duration {
568 let duration = self.start_time.elapsed();
569
570 if let Some(timings_store) = self.timings_store {
571 let mut timings = timings_store.write().await;
572 timings
573 .entry(self.operation_name)
574 .or_insert_with(Vec::new)
575 .push(duration);
576 }
577
578 duration
579 }
580}
581
582#[derive(Debug, Clone, Serialize, Deserialize)]
584pub struct TimingStats {
585 pub count: usize,
587 pub average: Duration,
589 pub minimum: Duration,
591 pub maximum: Duration,
593 pub p95: Duration,
595 pub p99: Duration,
597 pub std_dev: Duration,
599}
600
601impl TimingStats {
602 fn from_durations(durations: &[Duration]) -> Self {
603 if durations.is_empty() {
604 return Self {
605 count: 0,
606 average: Duration::ZERO,
607 minimum: Duration::ZERO,
608 maximum: Duration::ZERO,
609 p95: Duration::ZERO,
610 p99: Duration::ZERO,
611 std_dev: Duration::ZERO,
612 };
613 }
614
615 let mut sorted = durations.to_vec();
616 sorted.sort();
617
618 let count = sorted.len();
619 let sum: Duration = sorted.iter().sum();
620 let average = sum / count as u32;
621
622 let minimum = sorted[0];
623 let maximum = sorted[count - 1];
624
625 let p95_index = (count as f64 * 0.95) as usize;
626 let p99_index = (count as f64 * 0.99) as usize;
627 let p95 = sorted[p95_index.min(count - 1)];
628 let p99 = sorted[p99_index.min(count - 1)];
629
630 let variance: f64 = durations
632 .iter()
633 .map(|d| {
634 let diff = d.as_secs_f64() - average.as_secs_f64();
635 diff * diff
636 })
637 .sum::<f64>()
638 / count as f64;
639
640 let std_dev = Duration::from_secs_f64(variance.sqrt());
641
642 Self {
643 count,
644 average,
645 minimum,
646 maximum,
647 p95,
648 p99,
649 std_dev,
650 }
651 }
652}
653
654impl Default for PerformanceMetrics {
656 fn default() -> Self {
657 Self {
658 system: SystemMetrics::default(),
659 synthesis: SynthesisMetrics::default(),
660 memory: MemoryMetrics::default(),
661 gpu: None,
662 timestamp: SystemTime::now()
663 .duration_since(UNIX_EPOCH)
664 .unwrap_or_default()
665 .as_secs(),
666 }
667 }
668}
669
670impl Default for SystemMetrics {
671 fn default() -> Self {
672 Self {
673 cpu_usage: 0.0,
674 memory_used: 0,
675 memory_available: 0,
676 disk_read_bps: 0,
677 disk_write_bps: 0,
678 network_bps: 0,
679 thread_count: 0,
680 load_average: None,
681 }
682 }
683}
684
685impl Default for SynthesisMetrics {
686 fn default() -> Self {
687 Self {
688 total_operations: 0,
689 successful_operations: 0,
690 failed_operations: 0,
691 avg_synthesis_time_ms: 0.0,
692 total_audio_duration: 0.0,
693 real_time_factor: 0.0,
694 throughput_chars_per_sec: 0.0,
695 queue_depth: 0,
696 memory_per_operation_mb: 0.0,
697 }
698 }
699}
700
701impl Default for MemoryMetrics {
702 fn default() -> Self {
703 Self {
704 heap_used: 0,
705 peak_usage: 0,
706 allocations_per_sec: 0.0,
707 deallocations_per_sec: 0.0,
708 gc_events: 0,
709 fragmentation_percent: 0.0,
710 cache_hit_rate: 0.0,
711 }
712 }
713}
714
715#[cfg(test)]
716mod tests {
717 use super::*;
718
719 #[tokio::test]
720 async fn test_performance_profiler_creation() {
721 let profiler = PerformanceProfiler::new(true, 100);
722 assert!(profiler.enabled);
723 assert_eq!(profiler.max_history_size, 100);
724 }
725
726 #[tokio::test]
727 async fn test_metrics_recording() {
728 let profiler = PerformanceProfiler::new(true, 10);
729 let metrics = PerformanceMetrics::default();
730
731 profiler.record_metrics(metrics.clone()).await;
732
733 let summary = profiler.get_metrics_summary().await;
734 assert!(summary.is_some());
735 }
736
737 #[tokio::test]
738 async fn test_operation_timing() {
739 let profiler = PerformanceProfiler::new(true, 10);
740
741 let timer = profiler.start_operation("test_operation").await;
742 tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
743 let duration = timer.stop().await;
744
745 assert!(duration >= Duration::from_millis(10));
746
747 let stats = profiler.get_timing_stats("test_operation").await;
748 assert!(stats.is_some());
749 assert_eq!(stats.unwrap().count, 1);
750 }
751
752 #[tokio::test]
753 async fn test_timing_stats_calculation() {
754 let durations = vec![
755 Duration::from_millis(100),
756 Duration::from_millis(200),
757 Duration::from_millis(300),
758 Duration::from_millis(400),
759 Duration::from_millis(500),
760 ];
761
762 let stats = TimingStats::from_durations(&durations);
763
764 assert_eq!(stats.count, 5);
765 assert_eq!(stats.average, Duration::from_millis(300));
766 assert_eq!(stats.minimum, Duration::from_millis(100));
767 assert_eq!(stats.maximum, Duration::from_millis(500));
768 }
769
770 #[tokio::test]
771 async fn test_recommendations_generation() {
772 let profiler = PerformanceProfiler::new(true, 10);
773
774 let mut metrics = PerformanceMetrics::default();
776 metrics.system.cpu_usage = 95.0; metrics.memory.cache_hit_rate = 50.0; profiler.record_metrics(metrics).await;
780
781 let recommendations = profiler.generate_recommendations().await;
782 assert!(!recommendations.is_empty());
783
784 assert!(recommendations
786 .iter()
787 .any(|r| r.category == OptimizationCategory::Cpu));
788 assert!(recommendations
789 .iter()
790 .any(|r| r.category == OptimizationCategory::Caching));
791 }
792
793 #[test]
794 fn test_optimization_category_serialization() {
795 let category = OptimizationCategory::Memory;
796 let serialized = serde_json::to_string(&category).unwrap();
797 let deserialized: OptimizationCategory = serde_json::from_str(&serialized).unwrap();
798 assert_eq!(category, deserialized);
799 }
800
801 #[tokio::test]
802 async fn test_disabled_profiler() {
803 let profiler = PerformanceProfiler::new(false, 10);
804 let metrics = PerformanceMetrics::default();
805
806 profiler.record_metrics(metrics).await;
807
808 let summary = profiler.get_metrics_summary().await;
809 assert!(summary.is_none());
810
811 let recommendations = profiler.generate_recommendations().await;
812 assert!(recommendations.is_empty());
813 }
814}